summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-12-03 15:42:38 +0100
committerJon Bratseth <bratseth@gmail.com>2020-12-03 15:42:38 +0100
commitedcd9b7ea4a5506c8bf669ae0f3e81a615796746 (patch)
tree2775b9e95fa46433dee861c113c992a8c8dc4167 /node-repository
parente65a3c0549b9858325d303ae4a0eb02055204155 (diff)
Record scaling event completion
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java25
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java20
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java24
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java47
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java22
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java3
17 files changed, 168 insertions, 51 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 87a0684909c..35217cb4d05 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -183,6 +183,24 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
.findFirst());
}
+ /**
+ * Returns the cluster spec of the nodes in this, without any group designation
+ *
+ * @throws IllegalStateException if there are no nodes in thus list or they do niot all belong
+ * to the same cluster
+ */
+ public ClusterSpec clusterSpec() {
+ if (first().isEmpty()) throw new IllegalStateException("No nodes");
+ if (stream().anyMatch(node -> node.allocation().isEmpty()))
+ throw new IllegalStateException("Some nodes are not allocated to a cluster");
+
+ ClusterSpec firstNodeSpec = first().get().allocation().get().membership().cluster().with(Optional.empty());
+ if (stream().map(node -> node.allocation().get().membership().cluster().with(Optional.empty()))
+ .anyMatch(clusterSpec -> ! clusterSpec.equals(firstNodeSpec)))
+ throw new IllegalStateException("Nodes belong to multiple clusters");
+ return firstNodeSpec;
+ }
+
public ClusterResources toResources() {
if (isEmpty()) return new ClusterResources(0, 0, NodeResources.unspecified());
return new ClusterResources(size(),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 70d9215aa8f..b0e7c0bd61b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -257,6 +257,10 @@ public class NodeRepository extends AbstractComponent {
return NodeList.copyOf(getNodes(inState));
}
+ public NodeList list(ApplicationId application, State ... inState) {
+ return NodeList.copyOf(getNodes(application, inState));
+ }
+
/** Returns a filterable list of all nodes of an application */
public NodeList list(ApplicationId application) {
return NodeList.copyOf(getNodes(application));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index c996daf588b..92bc62229ed 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.applications;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -99,9 +100,16 @@ public class Cluster {
return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus);
}
+ /** Add or update (based on "at" time) a scaling event */
public Cluster with(ScalingEvent scalingEvent) {
List<ScalingEvent> scalingEvents = new ArrayList<>(this.scalingEvents);
- scalingEvents.add(scalingEvent);
+
+ int existingIndex = eventIndexAt(scalingEvent.at());
+ if (existingIndex >= 0)
+ scalingEvents.set(existingIndex, scalingEvent);
+ else
+ scalingEvents.add(scalingEvent);
+
prune(scalingEvents);
return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus);
}
@@ -130,4 +138,12 @@ public class Cluster {
scalingEvents.remove(0);
}
+ private int eventIndexAt(Instant at) {
+ for (int i = 0; i < scalingEvents.size(); i++) {
+ if (scalingEvents.get(i).at().equals(at))
+ return i;
+ }
+ return -1;
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
index 68e65d10d69..f4bd4e2020f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources;
import java.time.Instant;
import java.util.Objects;
+import java.util.Optional;
/**
* A recording of a change in resources for an application cluster
@@ -16,12 +17,19 @@ public class ScalingEvent {
private final ClusterResources from, to;
private final long generation;
private final Instant at;
+ private final Optional<Instant> completion;
- public ScalingEvent(ClusterResources from, ClusterResources to, long generation, Instant at) {
+ /** Do not use */
+ public ScalingEvent(ClusterResources from,
+ ClusterResources to,
+ long generation,
+ Instant at,
+ Optional<Instant> completion) {
this.from = from;
this.to = to;
this.generation = generation;
this.at = at;
+ this.completion = completion;
}
/** Returns the resources we changed from */
@@ -36,6 +44,13 @@ public class ScalingEvent {
/** Returns the time of this deployment */
public Instant at() { return at; }
+ /** Returns the instant this completed, or empty if it is not yet complete as far as we know */
+ public Optional<Instant> completion() { return completion; }
+
+ public ScalingEvent withCompletion(Instant completion) {
+ return new ScalingEvent(from, to, generation, at, Optional.of(completion));
+ }
+
@Override
public int hashCode() { return Objects.hash(from, to, generation, at); }
@@ -48,12 +63,18 @@ public class ScalingEvent {
if ( ! other.at.equals(this.at)) return false;
if ( ! other.from.equals(this.from)) return false;
if ( ! other.to.equals(this.to)) return false;
+ if ( ! other.completion.equals(this.completion)) return false;
return true;
}
@Override
public String toString() {
- return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at;
+ return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at +
+ (completion.isPresent() ? " completed " + completion.get() : "");
+ }
+
+ public static ScalingEvent create(ClusterResources from, ClusterResources to, long generation, Instant at) {
+ return new ScalingEvent(from, to, generation, at, Optional.empty());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 66c6d68931c..0cb69c2defa 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
@@ -43,7 +44,7 @@ public class Autoscaler {
* @param clusterNodes the list of all the active nodes in a cluster
* @return scaling advice for this cluster
*/
- public Advice suggest(Cluster cluster, List<Node> clusterNodes) {
+ public Advice suggest(Cluster cluster, NodeList clusterNodes) {
return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive());
}
@@ -53,23 +54,22 @@ public class Autoscaler {
* @param clusterNodes the list of all the active nodes in a cluster
* @return scaling advice for this cluster
*/
- public Advice autoscale(Cluster cluster, List<Node> clusterNodes) {
+ public Advice autoscale(Cluster cluster, NodeList clusterNodes) {
if (cluster.minResources().equals(cluster.maxResources())) return Advice.none("Autoscaling is disabled"); // Shortcut
return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive());
}
- private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) {
- ClusterSpec clusterSpec = clusterNodes.get(0).allocation().get().membership().cluster();
+ private Advice autoscale(Cluster cluster, NodeList clusterNodes, Limits limits, boolean exclusive) {
if ( ! stable(clusterNodes, nodeRepository))
return Advice.none("Cluster change in progress");
AllocatableClusterResources currentAllocation =
- new AllocatableClusterResources(clusterNodes, nodeRepository, cluster.exclusive());
+ new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive());
ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository);
int measurementsPerNode = clusterTimeseries.measurementsPerNode();
- if (measurementsPerNode < minimumMeasurementsPerNode(clusterSpec))
+ if (measurementsPerNode < minimumMeasurementsPerNode(clusterNodes.clusterSpec()))
return Advice.none("Collecting more data before making new scaling decisions" +
": Has " + measurementsPerNode + " data points per node" +
" (all: " + clusterTimeseries.measurementCount +
@@ -123,8 +123,8 @@ public class Autoscaler {
return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers());
}
- private boolean recentlyScaled(Cluster cluster, List<Node> clusterNodes) {
- Duration downscalingDelay = downscalingDelay(clusterNodes.get(0).allocation().get().membership().cluster());
+ private boolean recentlyScaled(Cluster cluster, NodeList clusterNodes) {
+ Duration downscalingDelay = downscalingDelay(clusterNodes.first().get().allocation().get().membership().cluster());
return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN)
.isAfter(nodeRepository.clock().instant().minus(downscalingDelay));
}
@@ -154,7 +154,7 @@ public class Autoscaler {
return Duration.ofHours(1);
}
- public static boolean stable(List<Node> nodes, NodeRepository nodeRepository) {
+ public static boolean stable(NodeList nodes, NodeRepository nodeRepository) {
// The cluster is processing recent changes
if (nodes.stream().anyMatch(node -> node.status().wantToRetire() ||
node.allocation().get().membership().retired() ||
@@ -162,7 +162,7 @@ public class Autoscaler {
return false;
// A deployment is ongoing
- if (nodeRepository.getNodes(nodes.get(0).allocation().get().owner(), Node.State.reserved).size() > 0)
+ if (nodeRepository.getNodes(nodes.first().get().allocation().get().owner(), Node.State.reserved).size() > 0)
return false;
return true;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index bf1960ad597..33f29b714c5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -1,8 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
-import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
@@ -17,7 +16,7 @@ import java.util.stream.Collectors;
*/
public class ClusterTimeseries {
- private final List<Node> clusterNodes;
+ private final NodeList clusterNodes;
final int measurementCount;
final int measurementCountWithoutStale;
@@ -25,13 +24,12 @@ public class ClusterTimeseries {
final int measurementCountWithoutStaleOutOfServiceUnstable;
/** The measurements for all nodes in this snapshot */
- private final List<NodeTimeseries> allNodeTimeseries;
+ private final List<NodeTimeseries> allTimeseries;
- public ClusterTimeseries(Cluster cluster, List<Node> clusterNodes, MetricsDb db, NodeRepository nodeRepository) {
+ public ClusterTimeseries(Cluster cluster, NodeList clusterNodes, MetricsDb db, NodeRepository nodeRepository) {
this.clusterNodes = clusterNodes;
- ClusterSpec clusterSpec = clusterNodes.get(0).allocation().get().membership().cluster();
- var timeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterSpec)),
- clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet()));
+ var timeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterNodes.clusterSpec())),
+ clusterNodes);
measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
@@ -46,24 +44,24 @@ public class ClusterTimeseries {
timeseries = filter(timeseries, snapshot -> snapshot.stable());
measurementCountWithoutStaleOutOfServiceUnstable = timeseries.stream().mapToInt(m -> m.size()).sum();
- this.allNodeTimeseries = timeseries;
+ this.allTimeseries = timeseries;
}
/** Returns the average number of measurements per node */
public int measurementsPerNode() {
- int measurementCount = allNodeTimeseries.stream().mapToInt(m -> m.size()).sum();
+ int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum();
return measurementCount / clusterNodes.size();
}
/** Returns the number of nodes measured in this */
public int nodesMeasured() {
- return allNodeTimeseries.size();
+ return allTimeseries.size();
}
/** Returns the average load of this resource in this */
public double averageLoad(Resource resource) {
- int measurementCount = allNodeTimeseries.stream().mapToInt(m -> m.size()).sum();
- double measurementSum = allNodeTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum();
+ int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum();
+ double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum();
return measurementSum / measurementCount;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java
index ea4ce4b44de..68acdcc88f7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java
@@ -2,6 +2,8 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.collections.Pair;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Clock;
@@ -9,6 +11,7 @@ import java.time.Instant;
import java.util.Collection;
import java.util.List;
import java.util.Set;
+import java.util.stream.Collectors;
/**
* An in-memory time-series database of node metrics.
@@ -26,6 +29,10 @@ public interface MetricsDb {
*/
List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames);
+ default List<NodeTimeseries> getNodeTimeseries(Instant startTime, NodeList nodes) {
+ return getNodeTimeseries(startTime, nodes.stream().map(Node::hostname).collect(Collectors.toSet()));
+ }
+
/** Must be called intermittently (as long as add is called) to gc old data */
void gc();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
index 5183eb1d628..7c5b839edf3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
@@ -64,7 +64,7 @@ public class MetricsResponse {
private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) {
ClusterSpec cluster = node.allocation().get().membership().cluster();
- return Autoscaler.stable(applicationNodes.cluster(cluster.id()).asList(), nodeRepository);
+ return Autoscaler.stable(applicationNodes.cluster(cluster.id()), nodeRepository);
}
private void consumeServiceMetrics(String hostname, Inspector node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 1197a01b9c7..e04dda2ca9c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -7,16 +7,21 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
+import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries;
+import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.status.ApplicationLock;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -30,6 +35,7 @@ import java.util.stream.Collectors;
public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
private final Autoscaler autoscaler;
+ private final MetricsDb metricsDb;
private final Deployer deployer;
private final Metric metric;
@@ -40,8 +46,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
Duration interval) {
super(nodeRepository, interval, metric);
this.autoscaler = new Autoscaler(metricsDb, nodeRepository);
- this.metric = metric;
+ this.metricsDb = metricsDb;
this.deployer = deployer;
+ this.metric = metric;
}
@Override
@@ -58,17 +65,18 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
private void autoscale(ApplicationId application, List<Node> applicationNodes) {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
if ( ! deployment.isValid()) return;
- nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes, deployment));
+ nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, NodeList.copyOf(clusterNodes), deployment));
}
}
private void autoscale(ApplicationId applicationId,
ClusterSpec.Id clusterId,
- List<Node> clusterNodes,
+ NodeList clusterNodes,
MaintenanceDeployment deployment) {
Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId));
if (application.cluster(clusterId).isEmpty()) return;
Cluster cluster = application.cluster(clusterId).get();
+ cluster = updateCompletion(cluster, clusterNodes);
var advice = autoscaler.autoscale(cluster, clusterNodes);
cluster = cluster.withAutoscalingStatus(advice.reason());
@@ -87,13 +95,38 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
return nodeRepository().applications();
}
+ /** Check if the last scaling event for this cluster has completed and if so record it in the returned instance */
+ private Cluster updateCompletion(Cluster cluster, NodeList clusterNodes) {
+ if (cluster.lastScalingEvent().isEmpty()) return cluster;
+ var event = cluster.lastScalingEvent().get();
+ if (event.completion().isPresent()) return cluster;
+
+ // Scaling event is complete if:
+ // - 1. no nodes which was retired by this are still present (which also implies data distribution is complete)
+ if (clusterNodes.retired().stream()
+ .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at())))
+ return cluster;
+
+ // - 2. all nodes have switched to the right config generation
+ for (NodeTimeseries nodeTimeseries : metricsDb.getNodeTimeseries(event.at(), clusterNodes)) {
+ Optional<MetricSnapshot> firstOnNewGeneration =
+ nodeTimeseries.asList().stream()
+ .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst();
+ if (firstOnNewGeneration.isEmpty()) return cluster; // Not completed
+ }
+
+
+ // Set the completion time to the instant we notice completion.
+ Instant completionTime = nodeRepository().clock().instant();
+ return cluster.with(event.withCompletion(completionTime));
+ }
+
private void logAutoscaling(ClusterResources target,
ApplicationId application,
Cluster cluster,
- List<Node> clusterNodes) {
- ClusterResources current = new AllocatableClusterResources(clusterNodes, nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources();
- ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type();
- log.info("Autoscaling " + application + " " + clusterType + " " + cluster.id() + ":" +
+ NodeList clusterNodes) {
+ ClusterResources current = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources();
+ log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" +
"\nfrom " + toString(current) + "\nto " + toString(target));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index 3546c8d8afb..7cb0270636f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Applications;
@@ -51,7 +52,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
private int suggest(ApplicationId application, List<Node> applicationNodes) {
int successes = 0;
for (var cluster : nodesByCluster(applicationNodes).entrySet())
- successes += suggest(application, cluster.getKey(), cluster.getValue()) ? 1 : 0;
+ successes += suggest(application, cluster.getKey(), NodeList.copyOf(cluster.getValue())) ? 1 : 0;
return successes;
}
@@ -61,7 +62,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
private boolean suggest(ApplicationId applicationId,
ClusterSpec.Id clusterId,
- List<Node> clusterNodes) {
+ NodeList clusterNodes) {
Application application = applications().get(applicationId).orElse(new Application(applicationId));
Optional<Cluster> cluster = application.cluster(clusterId);
if (cluster.isEmpty()) return true;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
index e92415d6538..3c2541bac27 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
@@ -39,6 +39,13 @@ public class History {
/** Returns this event if it is present in this history */
public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); }
+ /** Returns true if a given event is registered in this history at the given time */
+ public boolean hasEventAt(Event.Type type, Instant time) {
+ return event(type)
+ .map(event -> event.at().equals(time))
+ .orElse(false);
+ }
+
/** Returns true if a given event is registered in this history after the given time */
public boolean hasEventAfter(Event.Type type, Instant time) {
return event(type)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
index 3979b898145..4b9b14656ca 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
@@ -52,6 +52,7 @@ public class ApplicationSerializer {
private static final String toKey = "to";
private static final String generationKey = "generation";
private static final String atKey = "at";
+ private static final String completionKey = "completion";
public static byte[] toJson(Application application) {
Slime slime = new Slime();
@@ -140,13 +141,19 @@ public class ApplicationSerializer {
toSlime(event.to(), object.setObject(toKey));
object.setLong(generationKey, event.generation());
object.setLong(atKey, event.at().toEpochMilli());
+ event.completion().ifPresent(completion -> object.setLong(completionKey, completion.toEpochMilli()));
}
private static ScalingEvent scalingEventFromSlime(Inspector inspector) {
return new ScalingEvent(clusterResourcesFromSlime(inspector.field(fromKey)),
clusterResourcesFromSlime(inspector.field(toKey)),
inspector.field(generationKey).asLong(),
- Instant.ofEpochMilli(inspector.field(atKey).asLong()));
+ Instant.ofEpochMilli(inspector.field(atKey).asLong()),
+ optionalInstant(inspector.field(completionKey)));
+ }
+
+ private static Optional<Instant> optionalInstant(Inspector inspector) {
+ return inspector.valid() ? Optional.of(Instant.ofEpochMilli(inspector.asLong())) : Optional.empty();
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index 3cae4a5a5ea..e5d19527f2d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
@@ -60,6 +61,7 @@ class Activator {
* while holding the node repository lock on this application
*/
private void activateNodes(Collection<HostSpec> hosts, long generation, ApplicationTransaction transaction) {
+ Instant activationTime = nodeRepository.clock().instant(); // Use one timestamp for all activation changes
ApplicationId application = transaction.application();
Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet());
NodeList allNodes = nodeRepository.list();
@@ -69,7 +71,7 @@ class Activator {
List<Node> reservedToActivate = updatePortsFrom(hosts, retainHostsInList(hostnames, reserved));
List<Node> oldActive = applicationNodes.state(Node.State.active).asList(); // All nodes active now
List<Node> continuedActive = retainHostsInList(hostnames, oldActive);
- List<Node> newActive = updateFrom(hosts, continuedActive); // All nodes that will be active when this is committed
+ List<Node> newActive = updateFrom(hosts, continuedActive, activationTime); // All nodes that will be active when this is committed
newActive.addAll(reservedToActivate);
if ( ! containsAll(hostnames, newActive))
throw new IllegalArgumentException("Activation of " + application + " failed. " +
@@ -84,16 +86,16 @@ class Activator {
List<Node> activeToRemove = removeHostsFromList(hostnames, oldActive);
activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired. TODO: Move this line to deactivate
- nodeRepository.deactivate(activeToRemove, transaction);
+ nodeRepository.deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line
nodeRepository.activate(newActive, transaction.nested()); // activate also continued active to update node state
- rememberResourceChange(transaction, generation,
+ rememberResourceChange(transaction, generation, activationTime,
NodeList.copyOf(oldActive).not().retired(),
NodeList.copyOf(newActive).not().retired());
unreserveParentsOf(reservedToActivate);
}
- private void rememberResourceChange(ApplicationTransaction transaction, long generation,
+ private void rememberResourceChange(ApplicationTransaction transaction, long generation, Instant at,
NodeList oldNodes, NodeList newNodes) {
Optional<Application> application = nodeRepository.applications().get(transaction.application());
if (application.isEmpty()) return; // infrastructure app, hopefully :-|
@@ -106,10 +108,10 @@ class Activator {
var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources();
if ( ! previousResources.equals(currentResources)) {
modified = modified.with(application.get().cluster(clusterEntry.getKey()).get()
- .with(new ScalingEvent(previousResources,
- currentResources,
- generation,
- nodeRepository.clock().instant())));
+ .with(ScalingEvent.create(previousResources,
+ currentResources,
+ generation,
+ at)));
}
}
@@ -202,11 +204,11 @@ class Activator {
}
/** Returns the input nodes with the changes resulting from applying the settings in hosts to the given list of nodes. */
- private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes) {
+ private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes, Instant at) {
List<Node> updated = new ArrayList<>();
for (Node node : nodes) {
HostSpec hostSpec = getHost(node.hostname(), hosts);
- node = hostSpec.membership().get().retired() ? node.retire(nodeRepository.clock().instant()) : node.unretire();
+ node = hostSpec.membership().get().retired() ? node.retire(at) : node.unretire();
if (! hostSpec.advertisedResources().equals(node.resources())) // A resized node
node = node.with(new Flavor(hostSpec.advertisedResources()));
Allocation allocation = node.allocation().get()
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
index 9332eb79f20..0d423333ce1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
@@ -59,7 +59,7 @@ public class AutoscalingIntegrationTest {
tester.nodeRepository().applications().put(application, lock);
}
var scaledResources = autoscaler.suggest(application.clusters().get(cluster1.id()),
- tester.nodeRepository().getNodes(application1));
+ tester.nodeRepository().list(application1));
assertTrue(scaledResources.isPresent());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index e6a921d055e..43302c4fe23 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -204,7 +204,7 @@ class AutoscalingTester {
nodeRepository().applications().put(application, lock);
}
return autoscaler.autoscale(application.clusters().get(clusterId),
- nodeRepository().getNodes(applicationId, Node.State.active));
+ nodeRepository().list(applicationId, Node.State.active));
}
public Autoscaler.Advice suggest(ApplicationId applicationId, ClusterSpec.Id clusterId,
@@ -215,7 +215,7 @@ class AutoscalingTester {
nodeRepository().applications().put(application, lock);
}
return autoscaler.suggest(application.clusters().get(clusterId),
- nodeRepository().getNodes(applicationId, Node.State.active));
+ nodeRepository().list(applicationId, Node.State.active));
}
public ClusterResources assertResources(String message,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index b51f653ecc0..c8f1c499854 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -14,6 +14,7 @@ import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
+import java.util.Optional;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -90,6 +91,7 @@ public class AutoscalingMaintainerTest {
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents();
assertEquals(2, events.size());
+ assertEquals(Optional.of(firstMaintenanceTime), events.get(0).completion());
assertEquals(2, events.get(1).from().nodes());
assertEquals(4, events.get(1).to().nodes());
assertEquals(1, events.get(1).generation());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
index e63f31cf304..06473e60712 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java
@@ -45,7 +45,8 @@ public class ApplicationSerializerTest {
List.of(new ScalingEvent(new ClusterResources(10, 5, minResources),
new ClusterResources(12, 6, minResources),
7L,
- Instant.ofEpochMilli(12345L))),
+ Instant.ofEpochMilli(12345L),
+ Optional.of(Instant.ofEpochMilli(67890L)))),
"Autoscaling status"));
Application original = new Application(ApplicationId.from("myTenant", "myApplication", "myInstance"),
clusters);