diff options
Diffstat (limited to 'node-repository')
61 files changed, 1427 insertions, 573 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 87a0684909c..5c635551692 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -183,7 +183,28 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { .findFirst()); } + /** + * Returns the cluster spec of the nodes in this, without any group designation + * + * @throws IllegalStateException if there are no nodes in thus list or they do not all belong + * to the same cluster + */ + public ClusterSpec clusterSpec() { + ensureSingleCluster(); + if (isEmpty()) throw new IllegalStateException("No nodes"); + return first().get().allocation().get().membership().cluster().with(Optional.empty()); + } + + /** + * Returns the resources of the nodes of this. + * + * NOTE: If the nodes do not all have the same values of node resources, a random pick among those node resources + * will be returned. + * + * @throws IllegalStateException if the nodes in this do not all belong to the same cluster + */ public ClusterResources toResources() { + ensureSingleCluster(); if (isEmpty()) return new ClusterResources(0, 0, NodeResources.unspecified()); return new ClusterResources(size(), (int)stream().map(node -> node.allocation().get().membership().cluster().group().get()) @@ -192,6 +213,18 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { first().get().resources()); } + private void ensureSingleCluster() { + if (isEmpty()) return; + + if (stream().anyMatch(node -> node.allocation().isEmpty())) + throw new IllegalStateException("Some nodes are not allocated to a cluster"); + + ClusterSpec firstNodeSpec = first().get().allocation().get().membership().cluster().with(Optional.empty()); + if (stream().map(node -> node.allocation().get().membership().cluster().with(Optional.empty())) + .anyMatch(clusterSpec -> ! clusterSpec.equals(firstNodeSpec))) + throw new IllegalStateException("Nodes belong to multiple clusters"); + } + /** Returns the nodes of this as a stream */ public Stream<Node> stream() { return asList().stream(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 86795767710..b0e7c0bd61b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -257,6 +257,10 @@ public class NodeRepository extends AbstractComponent { return NodeList.copyOf(getNodes(inState)); } + public NodeList list(ApplicationId application, State ... inState) { + return NodeList.copyOf(getNodes(application, inState)); + } + /** Returns a filterable list of all nodes of an application */ public NodeList list(ApplicationId application) { return NodeList.copyOf(getNodes(application)); @@ -884,11 +888,15 @@ public class NodeRepository extends AbstractComponent { } public boolean canAllocateTenantNodeTo(Node host) { + return canAllocateTenantNodeTo(host, zone.getCloud().dynamicProvisioning()); + } + + public static boolean canAllocateTenantNodeTo(Node host, boolean dynamicProvisioning) { if ( ! host.type().canRun(NodeType.tenant)) return false; if (host.status().wantToRetire()) return false; if (host.allocation().map(alloc -> alloc.membership().retired()).orElse(false)) return false; - if (zone.getCloud().dynamicProvisioning()) + if (dynamicProvisioning) return EnumSet.of(State.active, State.ready, State.provisioned).contains(host.state()); else return host.state() == State.active; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 90133f7499e..92bc62229ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -3,9 +3,9 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; +import java.time.Instant; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -19,11 +19,15 @@ import java.util.Optional; */ public class Cluster { + public static final int maxScalingEvents = 15; + private final ClusterSpec.Id id; private final boolean exclusive; private final ClusterResources min, max; private final Optional<ClusterResources> suggested; private final Optional<ClusterResources> target; + + /** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */ private final List<ScalingEvent> scalingEvents; private final String autoscalingStatus; @@ -45,7 +49,7 @@ public class Cluster { this.target = Optional.empty(); else this.target = targetResources; - this.scalingEvents = scalingEvents; + this.scalingEvents = List.copyOf(scalingEvents); this.autoscalingStatus = autoscalingStatus; } @@ -96,9 +100,18 @@ public class Cluster { return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } + /** Add or update (based on "at" time) a scaling event */ public Cluster with(ScalingEvent scalingEvent) { - // NOTE: We're just storing the latest scaling event so far - return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent), autoscalingStatus); + List<ScalingEvent> scalingEvents = new ArrayList<>(this.scalingEvents); + + int existingIndex = eventIndexAt(scalingEvent.at()); + if (existingIndex >= 0) + scalingEvents.set(existingIndex, scalingEvent); + else + scalingEvents.add(scalingEvent); + + prune(scalingEvents); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } public Cluster withAutoscalingStatus(String autoscalingStatus) { @@ -120,4 +133,17 @@ public class Cluster { return "cluster '" + id + "'"; } + private void prune(List<ScalingEvent> scalingEvents) { + while (scalingEvents.size() > maxScalingEvents) + scalingEvents.remove(0); + } + + private int eventIndexAt(Instant at) { + for (int i = 0; i < scalingEvents.size(); i++) { + if (scalingEvents.get(i).at().equals(at)) + return i; + } + return -1; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java index 68e65d10d69..745fdcd736d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java @@ -3,8 +3,10 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; +import java.time.Duration; import java.time.Instant; import java.util.Objects; +import java.util.Optional; /** * A recording of a change in resources for an application cluster @@ -16,12 +18,19 @@ public class ScalingEvent { private final ClusterResources from, to; private final long generation; private final Instant at; + private final Optional<Instant> completion; - public ScalingEvent(ClusterResources from, ClusterResources to, long generation, Instant at) { + /** Do not use */ + public ScalingEvent(ClusterResources from, + ClusterResources to, + long generation, + Instant at, + Optional<Instant> completion) { this.from = from; this.to = to; this.generation = generation; this.at = at; + this.completion = completion; } /** Returns the resources we changed from */ @@ -36,6 +45,19 @@ public class ScalingEvent { /** Returns the time of this deployment */ public Instant at() { return at; } + /** Returns the instant this completed, or empty if it is not yet complete as far as we know */ + public Optional<Instant> completion() { return completion; } + + /** Returns the time this event took to completion, or empty if it's not yet complete */ + public Optional<Duration> duration() { + if (completion.isEmpty()) return Optional.empty(); + return Optional.of(Duration.between(at, completion.get())); + } + + public ScalingEvent withCompletion(Instant completion) { + return new ScalingEvent(from, to, generation, at, Optional.of(completion)); + } + @Override public int hashCode() { return Objects.hash(from, to, generation, at); } @@ -48,12 +70,18 @@ public class ScalingEvent { if ( ! other.at.equals(this.at)) return false; if ( ! other.from.equals(this.from)) return false; if ( ! other.to.equals(this.to)) return false; + if ( ! other.completion.equals(this.completion)) return false; return true; } @Override public String toString() { - return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at; + return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at + + (completion.isPresent() ? " completed " + completion.get() : ""); + } + + public static ScalingEvent create(ClusterResources from, ClusterResources to, long generation, Instant at) { + return new ScalingEvent(from, to, generation, at, Optional.empty()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index b1213b2da41..9eb4b796970 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -26,20 +26,19 @@ public class AllocatableClusterResources { private final NodeResources realResources; private final NodeResources advertisedResources; - private final ClusterSpec.Type clusterType; + private final ClusterSpec clusterSpec; private final double fulfilment; /** Fake allocatable resources from requested capacity */ public AllocatableClusterResources(ClusterResources requested, - ClusterSpec.Type clusterType, - boolean exclusive, + ClusterSpec clusterSpec, NodeRepository nodeRepository) { this.nodes = requested.nodes(); this.groups = requested.groups(); - this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), exclusive); + this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), clusterSpec.isExclusive()); this.advertisedResources = requested.nodeResources(); - this.clusterType = clusterType; + this.clusterSpec = clusterSpec; this.fulfilment = 1; } @@ -48,19 +47,19 @@ public class AllocatableClusterResources { this.groups = (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); this.realResources = averageRealResourcesOf(nodes, nodeRepository, exclusive); // Average since we average metrics over nodes this.advertisedResources = nodes.get(0).resources(); - this.clusterType = nodes.get(0).allocation().get().membership().cluster().type(); + this.clusterSpec = nodes.get(0).allocation().get().membership().cluster(); this.fulfilment = 1; } public AllocatableClusterResources(ClusterResources realResources, NodeResources advertisedResources, NodeResources idealResources, - ClusterSpec.Type clusterType) { + ClusterSpec clusterSpec) { this.nodes = realResources.nodes(); this.groups = realResources.groups(); this.realResources = realResources.nodeResources(); this.advertisedResources = advertisedResources; - this.clusterType = clusterType; + this.clusterSpec = clusterSpec; this.fulfilment = fulfilment(realResources.nodeResources(), idealResources); } @@ -88,7 +87,7 @@ public class AllocatableClusterResources { return (int)Math.ceil((double)nodes / groups); } - public ClusterSpec.Type clusterType() { return clusterType; } + public ClusterSpec clusterSpec() { return clusterSpec; } public double cost() { return nodes * advertisedResources.cost(); } @@ -133,23 +132,23 @@ public class AllocatableClusterResources { } public static Optional<AllocatableClusterResources> from(ClusterResources wantedResources, - boolean exclusive, - ClusterSpec.Type clusterType, + ClusterSpec clusterSpec, Limits applicationLimits, NodeRepository nodeRepository) { var systemLimits = new NodeResourceLimits(nodeRepository); - if ( !exclusive && !nodeRepository.zone().getCloud().dynamicProvisioning()) { + boolean exclusive = clusterSpec.isExclusive(); + if ( !clusterSpec.isExclusive() && !nodeRepository.zone().getCloud().dynamicProvisioning()) { // We decide resources: Add overhead to what we'll request (advertised) to make sure real becomes (at least) cappedNodeResources NodeResources advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive); - advertisedResources = systemLimits.enlargeToLegal(advertisedResources, clusterType, exclusive); // Attempt to ask for something legal + advertisedResources = systemLimits.enlargeToLegal(advertisedResources, clusterSpec.type(), exclusive); // Attempt to ask for something legal advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // ... thus, what we really get may change - if ( ! systemLimits.isWithinRealLimits(realResources, clusterType)) return Optional.empty(); + if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) return Optional.empty(); if (matchesAny(nodeRepository.flavors().getFlavors(), advertisedResources)) return Optional.of(new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources.nodeResources(), - clusterType)); + clusterSpec)); else return Optional.empty(); } @@ -172,11 +171,11 @@ public class AllocatableClusterResources { } if ( ! between(applicationLimits.min().nodeResources(), applicationLimits.max().nodeResources(), advertisedResources)) continue; - if ( ! systemLimits.isWithinRealLimits(realResources, clusterType)) continue; + if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) continue; var candidate = new AllocatableClusterResources(wantedResources.with(realResources), advertisedResources, wantedResources.nodeResources(), - clusterType); + clusterSpec); if (best.isEmpty() || candidate.preferableTo(best.get())) best = Optional.of(candidate); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index e57011b0e4a..fb97e803a35 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -58,7 +58,7 @@ public class AllocationOptimizer { groups, nodeResourcesWith(nodesAdjustedForRedundancy, groupsAdjustedForRedundancy, limits, current, target)); - var allocatableResources = AllocatableClusterResources.from(next, exclusive, current.clusterType(), limits, nodeRepository); + var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, nodeRepository); if (allocatableResources.isEmpty()) continue; if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) bestAllocation = allocatableResources; @@ -79,7 +79,7 @@ public class AllocationOptimizer { int groupSize = nodes / groups; - if (current.clusterType().isContent()) { // load scales with node share of content + if (current.clusterSpec().isStateful()) { // load scales with node share of content // The fixed cost portion of cpu does not scale with changes to the node count // TODO: Only for the portion of cpu consumed by queries double cpuPerGroup = fixedCpuCostFraction * target.nodeCpu() + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index d2c943794fe..79383c056f9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -5,18 +5,18 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import java.time.Duration; import java.time.Instant; -import java.util.List; import java.util.Objects; import java.util.Optional; /** - * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster - * based on observed behavior. + * The autoscaler gives advice about what resources should be allocated to a cluster based on observed behavior. * * @author bratseth */ @@ -44,7 +44,7 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Advice suggest(Cluster cluster, List<Node> clusterNodes) { + public Advice suggest(Cluster cluster, NodeList clusterNodes) { return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive()); } @@ -54,30 +54,34 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Advice autoscale(Cluster cluster, List<Node> clusterNodes) { - if (cluster.minResources().equals(cluster.maxResources())) return Advice.none("Autoscaling is disabled"); // Shortcut + public Advice autoscale(Cluster cluster, NodeList clusterNodes) { + if (cluster.minResources().equals(cluster.maxResources())) return Advice.none("Autoscaling is not enabled"); return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()); } - private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) { - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - if (unstable(clusterNodes, nodeRepository)) + private Advice autoscale(Cluster cluster, NodeList clusterNodes, Limits limits, boolean exclusive) { + if ( ! stable(clusterNodes, nodeRepository)) return Advice.none("Cluster change in progress"); - AllocatableClusterResources currentAllocation = - new AllocatableClusterResources(clusterNodes, nodeRepository, cluster.exclusive()); + Duration scalingWindow = scalingWindow(clusterNodes.clusterSpec(), cluster); + if (scaledIn(scalingWindow, cluster)) + return Advice.dontScale("Won't autoscale now: Less than " + scalingWindow + " since last rescaling"); - ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository); + ClusterTimeseries clusterTimeseries = + new ClusterTimeseries(nodeRepository.clock().instant().minus(scalingWindow), cluster, clusterNodes, metricsDb); + AllocatableClusterResources currentAllocation = + new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive()); int measurementsPerNode = clusterTimeseries.measurementsPerNode(); - if (measurementsPerNode < minimumMeasurementsPerNode(clusterType)) - return Advice.none("Collecting more data before making new scaling decisions" + - ": Has " + measurementsPerNode + " data points per node"); + if (measurementsPerNode < minimumMeasurementsPerNode(scalingWindow)) + return Advice.none("Collecting more data before making new scaling decisions: " + + "Have " + measurementsPerNode + " measurements per node but require " + + minimumMeasurementsPerNode(scalingWindow)); int nodesMeasured = clusterTimeseries.nodesMeasured(); if (nodesMeasured != clusterNodes.size()) - return Advice.none("Collecting more data before making new scaling decisions" + - ": Has measurements from " + nodesMeasured + " but need from " + clusterNodes.size()); + return Advice.none("Collecting more data before making new scaling decisions: " + + "Have measurements from " + nodesMeasured + " but require from " + clusterNodes.size()); double cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); double memoryLoad = clusterTimeseries.averageLoad(Resource.memory); @@ -91,9 +95,10 @@ public class Autoscaler { return Advice.dontScale("No allocation changes are possible within configured limits"); if (similar(bestAllocation.get(), currentAllocation)) - return Advice.dontScale("Cluster is ideally scaled (within configured limits)"); - if (isDownscaling(bestAllocation.get(), currentAllocation) && recentlyScaled(cluster, clusterNodes)) - return Advice.dontScale("Waiting a while before scaling down"); + return Advice.dontScale("Cluster is ideally scaled within configured limits"); + + if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(scalingWindow.multipliedBy(3), cluster)) + return Advice.dontScale("Waiting " + scalingWindow.multipliedBy(3) + " since last rescaling before reducing resources"); return Advice.scaleTo(bestAllocation.get().toAdvertisedClusterResources()); } @@ -120,49 +125,64 @@ public class Autoscaler { return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers()); } - private boolean recentlyScaled(Cluster cluster, List<Node> clusterNodes) { - Duration downscalingDelay = downscalingDelay(clusterNodes.get(0).allocation().get().membership().cluster().type()); + private boolean scaledIn(Duration delay, Cluster cluster) { return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) - .isAfter(nodeRepository.clock().instant().minus(downscalingDelay)); + .isAfter(nodeRepository.clock().instant().minus(delay)); } /** The duration of the window we need to consider to make a scaling decision. See also minimumMeasurementsPerNode */ - static Duration scalingWindow(ClusterSpec.Type clusterType) { - if (clusterType.isContent()) return Duration.ofHours(12); - return Duration.ofMinutes(30); - } + private Duration scalingWindow(ClusterSpec clusterSpec, Cluster cluster) { + int completedEventCount = 0; + Duration totalDuration = Duration.ZERO; + for (ScalingEvent event : cluster.scalingEvents()) { + if (event.duration().isEmpty()) continue; + completedEventCount++; + totalDuration = totalDuration.plus(event.duration().get()); + } - static Duration maxScalingWindow() { - return Duration.ofHours(12); + if (completedEventCount == 0) { // Use defaults + if (clusterSpec.isStateful()) return Duration.ofHours(12); + return Duration.ofMinutes(10); + } + else { + Duration predictedDuration = totalDuration.dividedBy(completedEventCount); + + // TODO: Remove when we have reliable completion for content clusters + if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) + return Duration.ofHours(12); + + if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum + return predictedDuration; + } } - /** Measurements are currently taken once a minute. See also scalingWindow */ - static int minimumMeasurementsPerNode(ClusterSpec.Type clusterType) { - if (clusterType.isContent()) return 60; - return 7; + static Duration maxScalingWindow() { + return Duration.ofHours(48); } - /** - * We should wait a while before scaling down after a scaling event as a peak in usage - * indicates more peaks may arrive in the near future. - */ - static Duration downscalingDelay(ClusterSpec.Type clusterType) { - if (clusterType.isContent()) return Duration.ofHours(12); - return Duration.ofHours(1); + /** Returns the minimum measurements per node (average) we require to give autoscaling advice.*/ + private int minimumMeasurementsPerNode(Duration scalingWindow) { + // Measurements are ideally taken every minute, but no guarantees + // (network, nodes may be down, collecting is single threaded and may take longer than 1 minute to complete). + // Since the metric window is 5 minutes, we won't really improve from measuring more often. + long minimumMeasurements = scalingWindow.toMinutes() / 5; + minimumMeasurements = Math.round(0.8 * minimumMeasurements); // Allow 20% metrics collection blackout + if (minimumMeasurements < 1) minimumMeasurements = 1; + return (int)minimumMeasurements; } - public static boolean unstable(List<Node> nodes, NodeRepository nodeRepository) { + public static boolean stable(NodeList nodes, NodeRepository nodeRepository) { // The cluster is processing recent changes if (nodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || node.allocation().get().isRemovable())) - return true; + return false; // A deployment is ongoing - if (nodeRepository.getNodes(nodes.get(0).allocation().get().owner(), Node.State.reserved).size() > 0) - return true; + if (nodeRepository.getNodes(nodes.first().get().allocation().get().owner(), Node.State.reserved).size() > 0) + return false; - return false; + return true; } public static class Advice { @@ -197,6 +217,13 @@ public class Autoscaler { private static Advice scaleTo(ClusterResources target) { return new Advice(Optional.of(target), true, "Scaling due to load changes"); } + + @Override + public String toString() { + return "autoscaling advice: " + + (present ? (target.isPresent() ? "Scale to " + target.get() : "Don't scale") : " None"); + } + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index e325e797ca5..e1a3ceca033 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -1,15 +1,13 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Instant; -import java.util.HashMap; import java.util.List; -import java.util.Map; +import java.util.function.Predicate; import java.util.stream.Collectors; /** @@ -19,64 +17,38 @@ import java.util.stream.Collectors; */ public class ClusterTimeseries { - private final List<Node> clusterNodes; + private final NodeList clusterNodes; - /** The measurements for all hosts in this snapshot */ - private final List<NodeTimeseries> nodeTimeseries; + /** The measurements for all nodes in this snapshot */ + private final List<NodeTimeseries> allTimeseries; - public ClusterTimeseries(Cluster cluster, List<Node> clusterNodes, MetricsDb db, NodeRepository nodeRepository) { + public ClusterTimeseries(Instant startTime, Cluster cluster, NodeList clusterNodes, MetricsDb db) { this.clusterNodes = clusterNodes; - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - var allTimeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), - clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet())); - Map<String, Instant> startTimePerNode = metricStartTimes(cluster, clusterNodes, allTimeseries, nodeRepository); - nodeTimeseries = filterStale(allTimeseries, startTimePerNode); - } + var timeseries = db.getNodeTimeseries(startTime, clusterNodes); - /** - * Returns the instant of the oldest metric to consider for each node, or an empty map if metrics from the - * entire (max) window should be considered. - */ - private Map<String, Instant> metricStartTimes(Cluster cluster, - List<Node> clusterNodes, - List<NodeTimeseries> nodeTimeseries, - NodeRepository nodeRepository) { - Map<String, Instant> startTimePerHost = new HashMap<>(); - if ( ! cluster.scalingEvents().isEmpty()) { - var deployment = cluster.scalingEvents().get(cluster.scalingEvents().size() - 1); - for (Node node : clusterNodes) { - startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise - var nodeGenerationMeasurements = - nodeTimeseries.stream().filter(m -> m.hostname().equals(node.hostname())).findAny(); - if (nodeGenerationMeasurements.isPresent()) { - var firstMeasurementOfCorrectGeneration = - nodeGenerationMeasurements.get().asList().stream() - .filter(m -> m.generation() >= deployment.generation()) - .findFirst(); - if (firstMeasurementOfCorrectGeneration.isPresent()) { - startTimePerHost.put(node.hostname(), firstMeasurementOfCorrectGeneration.get().at()); - } - } - } - } - return startTimePerHost; + if (cluster.lastScalingEvent().isPresent()) + timeseries = filter(timeseries, snapshot -> snapshot.generation() < 0 || // Content nodes do not yet send generation + snapshot.generation() >= cluster.lastScalingEvent().get().generation()); + timeseries = filter(timeseries, snapshot -> snapshot.inService() && snapshot.stable()); + + this.allTimeseries = timeseries; } /** Returns the average number of measurements per node */ public int measurementsPerNode() { - int measurementCount = nodeTimeseries.stream().mapToInt(m -> m.size()).sum(); + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); return measurementCount / clusterNodes.size(); } /** Returns the number of nodes measured in this */ public int nodesMeasured() { - return nodeTimeseries.size(); + return allTimeseries.size(); } /** Returns the average load of this resource in this */ public double averageLoad(Resource resource) { - int measurementCount = nodeTimeseries.stream().mapToInt(m -> m.size()).sum(); - double measurementSum = nodeTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); return measurementSum / measurementCount; } @@ -89,10 +61,8 @@ public class ClusterTimeseries { } } - private List<NodeTimeseries> filterStale(List<NodeTimeseries> timeseries, - Map<String, Instant> startTimePerHost) { - if (startTimePerHost.isEmpty()) return timeseries; // Map is either empty or complete - return timeseries.stream().map(m -> m.justAfter(startTimePerHost.get(m.hostname()))).collect(Collectors.toList()); + private List<NodeTimeseries> filter(List<NodeTimeseries> timeseries, Predicate<MetricSnapshot> filter) { + return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.filter(filter)).collect(Collectors.toList()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java index 6f6cd862c33..d5072475cd9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java @@ -8,9 +8,8 @@ import java.time.Instant; * * @author bratseth */ -public class MetricSnapshot { +public class MetricSnapshot implements Comparable<MetricSnapshot> { - // TODO: Order by timestamp private final Instant at; private final double cpu; @@ -18,28 +17,43 @@ public class MetricSnapshot { private final double disk; private final long generation; private final boolean inService; + private final boolean stable; - public MetricSnapshot(Instant at, double cpu, double memory, double disk, long generation, boolean inService) { + public MetricSnapshot(Instant at, double cpu, double memory, double disk, long generation, + boolean inService, boolean stable) { this.at = at; this.cpu = cpu; this.memory = memory; this.disk = disk; this.generation = generation; this.inService = inService; + this.stable = stable; } public Instant at() { return at; } public double cpu() { return cpu; } public double memory() { return memory; } public double disk() { return disk; } + + /** The configuration generation at the time of this measurement, or -1 if not known */ public long generation() { return generation; } + public boolean inService() { return inService; } + public boolean stable() { return stable; } + + @Override + public int compareTo(MetricSnapshot other) { + return at.compareTo(other.at); + } @Override public String toString() { return "metrics at " + at + ":" + " cpu: " + cpu + " memory: " + memory + " disk: " + disk + - " generation: " + generation; } + " generation: " + generation + + " inService: " + inService + + " stable: " + stable; + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java index ea4ce4b44de..68acdcc88f7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Clock; @@ -9,6 +11,7 @@ import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** * An in-memory time-series database of node metrics. @@ -26,6 +29,10 @@ public interface MetricsDb { */ List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames); + default List<NodeTimeseries> getNodeTimeseries(Instant startTime, NodeList nodes) { + return getNodeTimeseries(startTime, nodes.stream().map(Node::hostname).collect(Collectors.toSet())); + } + /** Must be called intermittently (as long as add is called) to gc old data */ void gc(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index 4471d267416..7c5b839edf3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -2,17 +2,24 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.Slime; import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Optional; /** * Consumes a response from the metrics/v2 API and populates the fields of this with the resulting values @@ -23,33 +30,41 @@ public class MetricsResponse { private final Collection<Pair<String, MetricSnapshot>> nodeMetrics = new ArrayList<>(); - public MetricsResponse(String response) { - this(SlimeUtils.jsonToSlime(response)); + public MetricsResponse(String response, NodeList applicationNodes, NodeRepository nodeRepository) { + this(SlimeUtils.jsonToSlime(response), applicationNodes, nodeRepository); } public Collection<Pair<String, MetricSnapshot>> metrics() { return nodeMetrics; } - private MetricsResponse(Slime response) { + private MetricsResponse(Slime response, NodeList applicationNodes, NodeRepository nodeRepository) { Inspector root = response.get(); Inspector nodes = root.field("nodes"); - nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node)); + nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node, applicationNodes, nodeRepository)); } - private void consumeNode(Inspector node) { + private void consumeNode(Inspector node, NodeList applicationNodes, NodeRepository nodeRepository) { String hostname = node.field("hostname").asString(); - consumeNodeMetrics(hostname, node.field("node")); + consumeNodeMetrics(hostname, node.field("node"), applicationNodes, nodeRepository); // consumeServiceMetrics(hostname, node.field("services")); } - private void consumeNodeMetrics(String hostname, Inspector node) { - long timestampSecond = node.field("timestamp").asLong(); - Map<String, Double> values = consumeMetrics(node.field("metrics")); + private void consumeNodeMetrics(String hostname, Inspector nodeData, NodeList applicationNodes, NodeRepository nodeRepository) { + Optional<Node> node = applicationNodes.stream().filter(n -> n.hostname().equals(hostname)).findAny(); + if (node.isEmpty()) return; // Node is not part of this cluster any more + long timestampSecond = nodeData.field("timestamp").asLong(); + Map<String, Double> values = consumeMetrics(nodeData.field("metrics")); nodeMetrics.add(new Pair<>(hostname, new MetricSnapshot(Instant.ofEpochMilli(timestampSecond * 1000), Metric.cpu.from(values), Metric.memory.from(values), Metric.disk.from(values), (long)Metric.generation.from(values), - Metric.inService.from(values) > 0))); + Metric.inService.from(values) > 0, + clusterIsStable(node.get(), applicationNodes, nodeRepository)))); + } + + private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) { + ClusterSpec cluster = node.allocation().get().membership().cluster(); + return Autoscaler.stable(applicationNodes.cluster(cluster.id()), nodeRepository); } private void consumeServiceMetrics(String hostname, Inspector node) { @@ -86,6 +101,7 @@ public class MetricsResponse { generation { // application config generation active on the node public String metricResponseName() { return "application_generation"; } double convertValue(double metricValue) { return (float)metricValue; } // Really a long + double defaultValue() { return -1.0; } }, inService { public String metricResponseName() { return "in_service"; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java index 33bdc746678..4afc876056a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java @@ -55,9 +55,6 @@ public class MetricsV2MetricsFetcher extends AbstractComponent implements Metric public Collection<Pair<String, MetricSnapshot>> fetchMetrics(ApplicationId application) { NodeList applicationNodes = nodeRepository.list(application).state(Node.State.active); - // Do not try to draw conclusions from utilization while unstable - if (Autoscaler.unstable(applicationNodes.asList(), nodeRepository)) return Collections.emptyList(); - Optional<Node> metricsV2Container = applicationNodes.container() .matching(node -> expectedUp(node)) .stream() @@ -65,8 +62,7 @@ public class MetricsV2MetricsFetcher extends AbstractComponent implements Metric if (metricsV2Container.isEmpty()) return Collections.emptyList(); // Consumer 'autoscaling' defined in com.yahoo.vespa.model.admin.monitoring.MetricConsumer String url = "http://" + metricsV2Container.get().hostname() + ":" + 4080 + apiPath + "?consumer=autoscaling"; - String response = httpClient.get(url); - return new MetricsResponse(response).metrics(); + return new MetricsResponse(httpClient.get(url), applicationNodes, nodeRepository).metrics(); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java index 6cba3928b8f..24876609f58 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java @@ -1,16 +1,15 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ClusterSpec; - import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.function.Predicate; import java.util.stream.Collectors; /** - * A list of metric snapshots from a host + * A list of metric snapshots from a node, sorted by increasing time (newest last). * * @author bratseth */ @@ -19,10 +18,11 @@ public class NodeTimeseries { private final String hostname; private final List<MetricSnapshot> snapshots; - // Note: This transfers ownership of the snapshot list to this NodeTimeseries(String hostname, List<MetricSnapshot> snapshots) { this.hostname = hostname; - this.snapshots = snapshots; + List<MetricSnapshot> sortedSnapshots = new ArrayList<>(snapshots); + Collections.sort(sortedSnapshots); + this.snapshots = Collections.unmodifiableList(sortedSnapshots); } public boolean isEmpty() { return snapshots.isEmpty(); } @@ -31,7 +31,7 @@ public class NodeTimeseries { public MetricSnapshot get(int index) { return snapshots.get(index); } - public List<MetricSnapshot> asList() { return Collections.unmodifiableList(snapshots); } + public List<MetricSnapshot> asList() { return snapshots; } public String hostname() { return hostname; } @@ -41,6 +41,10 @@ public class NodeTimeseries { return new NodeTimeseries(hostname(), list); } + public NodeTimeseries filter(Predicate<MetricSnapshot> filter) { + return new NodeTimeseries(hostname, snapshots.stream().filter(filter).collect(Collectors.toList())); + } + public NodeTimeseries justAfter(Instant oldestTime) { return new NodeTimeseries(hostname, snapshots.stream() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java index bf013810b2f..8e90294b4a5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java @@ -4,10 +4,12 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.google.inject.Inject; import com.yahoo.collections.ListMap; import com.yahoo.collections.Pair; +import com.yahoo.component.AbstractComponent; import com.yahoo.io.IOUtils; import com.yahoo.vespa.defaults.Defaults; import io.questdb.cairo.CairoConfiguration; import io.questdb.cairo.CairoEngine; +import io.questdb.cairo.CairoException; import io.questdb.cairo.DefaultCairoConfiguration; import io.questdb.cairo.TableWriter; import io.questdb.cairo.sql.Record; @@ -40,14 +42,14 @@ import java.util.stream.Collectors; * * @author bratseth */ -public class QuestMetricsDb implements MetricsDb { +public class QuestMetricsDb extends AbstractComponent implements MetricsDb { private static final Logger log = Logger.getLogger(QuestMetricsDb.class.getName()); - private static final String tableName = "metrics"; + private static final String table = "metrics"; private final Clock clock; private final String dataDir; - private final CairoEngine engine; + private CairoEngine engine; private long highestTimestampAdded = 0; @@ -63,8 +65,11 @@ public class QuestMetricsDb implements MetricsDb { && ! new File(Defaults.getDefaults().vespaHome()).exists()) dataDir = "data"; // We're injected, but not on a node with Vespa installed this.dataDir = dataDir; + initializeDb(); + } - IOUtils.createDirectory(dataDir + "/" + tableName); + private void initializeDb() { + IOUtils.createDirectory(dataDir + "/" + table); // silence Questdb's custom logging system IOUtils.writeFile(new File(dataDir, "quest-log.conf"), new byte[0]); @@ -73,28 +78,43 @@ public class QuestMetricsDb implements MetricsDb { CairoConfiguration configuration = new DefaultCairoConfiguration(dataDir); engine = new CairoEngine(configuration); - ensureExists(tableName); + ensureExists(table); } @Override public void add(Collection<Pair<String, MetricSnapshot>> snapshots) { - try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), tableName)) { - for (var snapshot : snapshots) { - long atMillis = adjustIfRecent(snapshot.getSecond().at().toEpochMilli(), highestTimestampAdded); - if (atMillis < highestTimestampAdded) continue; // Ignore old data - highestTimestampAdded = atMillis; - TableWriter.Row row = writer.newRow(atMillis * 1000); // in microseconds - row.putStr(0, snapshot.getFirst()); - row.putFloat(2, (float)snapshot.getSecond().cpu()); - row.putFloat(3, (float)snapshot.getSecond().memory()); - row.putFloat(4, (float)snapshot.getSecond().disk()); - row.putLong(5, snapshot.getSecond().generation()); - row.append(); + try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), table)) { + add(snapshots, writer); + } + catch (CairoException e) { + if (e.getMessage().contains("Cannot read offset")) { + // This error seems non-recoverable + repair(e); + try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), table)) { + add(snapshots, writer); + } } - writer.commit(); } } + private void add(Collection<Pair<String, MetricSnapshot>> snapshots, TableWriter writer) { + for (var snapshot : snapshots) { + long atMillis = adjustIfRecent(snapshot.getSecond().at().toEpochMilli(), highestTimestampAdded); + if (atMillis < highestTimestampAdded) continue; // Ignore old data + highestTimestampAdded = atMillis; + TableWriter.Row row = writer.newRow(atMillis * 1000); // in microseconds + row.putStr(0, snapshot.getFirst()); + row.putFloat(2, (float)snapshot.getSecond().cpu()); + row.putFloat(3, (float)snapshot.getSecond().memory()); + row.putFloat(4, (float)snapshot.getSecond().disk()); + row.putLong(5, snapshot.getSecond().generation()); + row.putBool(6, snapshot.getSecond().inService()); + row.putBool(7, snapshot.getSecond().stable()); + row.append(); + } + writer.commit(); + } + @Override public List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames) { try (SqlCompiler compiler = new SqlCompiler(engine)) { @@ -116,7 +136,7 @@ public class QuestMetricsDb implements MetricsDb { SqlExecutionContext context = newContext(); int partitions = 0; try (SqlCompiler compiler = new SqlCompiler(engine)) { - File tableRoot = new File(dataDir, tableName); + File tableRoot = new File(dataDir, table); List<String> removeList = new ArrayList<>(); for (String dirEntry : tableRoot.list()) { File partitionDir = new File(tableRoot, dirEntry); @@ -131,7 +151,7 @@ public class QuestMetricsDb implements MetricsDb { } // Remove unless all partitions are old: Removing all partitions "will be supported in the future" if ( removeList.size() < partitions && ! removeList.isEmpty()) - compiler.compile("alter table " + tableName + " drop partition " + + compiler.compile("alter table " + table + " drop partition " + removeList.stream().map(dir -> "'" + dir + "'").collect(Collectors.joining(",")), context); } @@ -141,27 +161,78 @@ public class QuestMetricsDb implements MetricsDb { } @Override + public void deconstruct() { close(); } + + @Override public void close() { if (engine != null) engine.close(); } - private void ensureExists(String tableName) { + /** + * Repairs this db on corruption. + * + * @param e the exception indicating corruption + */ + private void repair(Exception e) { + log.log(Level.WARNING, "QuestDb seems corrupted, wiping data and starting over", e); + IOUtils.recursiveDeleteDir(new File(dataDir)); + initializeDb(); + } + + private void ensureExists(String table) { SqlExecutionContext context = newContext(); - if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), tableName)) return; + if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), table)) { // table exists + ensureUpdated(table, context); + } else { + create(table, context); + } + } + private void ensureUpdated(String table, SqlExecutionContext context) { try (SqlCompiler compiler = new SqlCompiler(engine)) { - compiler.compile("create table " + tableName + - " (hostname string, at timestamp, cpu_util float, mem_total_util float, disk_util float, application_generation long)" + + if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), table)) { + ensureColumnExists("inService", "boolean", table, compiler, context); // TODO: Remove after December 2020 + ensureColumnExists("stable", "boolean", table, compiler, context); // TODO: Remove after December 2020 + } + } catch (SqlException e) { + repair(e); + } + } + + private void create(String table, SqlExecutionContext context) { + try (SqlCompiler compiler = new SqlCompiler(engine)) { + compiler.compile("create table " + table + + " (hostname string, at timestamp, cpu_util float, mem_total_util float, disk_util float," + + " application_generation long, inService boolean, stable boolean)" + " timestamp(at)" + "PARTITION BY DAY;", context); - // We should do this if we get a version where selecting on stringhs work embedded, see below + // We should do this if we get a version where selecting on strings work embedded, see below // compiler.compile("alter table " + tableName + " alter column hostname add index", context); } catch (SqlException e) { - throw new IllegalStateException("Could not create Quest db table '" + tableName + "'", e); + throw new IllegalStateException("Could not create Quest db table '" + table + "'", e); + } + } + + private void ensureColumnExists(String column, String columnType, + String table, SqlCompiler compiler, SqlExecutionContext context) throws SqlException { + if (columnNamesOf(table, compiler, context).contains(column)) return; + compiler.compile("alter table " + table + " add column " + column + " " + columnType, context); + } + + private List<String> columnNamesOf(String tableName, SqlCompiler compiler, SqlExecutionContext context) throws SqlException { + List<String> columns = new ArrayList<>(); + try (RecordCursorFactory factory = compiler.compile("show columns from " + tableName, context).getRecordCursorFactory()) { + try (RecordCursor cursor = factory.getCursor(context)) { + Record record = cursor.getRecord(); + while (cursor.hasNext()) { + columns.add(record.getStr(0).toString()); + } + } } + return columns; } private long adjustIfRecent(long timestamp, long highestTimestampAdded) { @@ -182,7 +253,7 @@ public class QuestMetricsDb implements MetricsDb { DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME.withZone(ZoneId.of("UTC")); String from = formatter.format(startTime).substring(0, 19) + ".000000Z"; String to = formatter.format(clock.instant()).substring(0, 19) + ".000000Z"; - String sql = "select * from " + tableName + " where at in('" + from + "', '" + to + "');"; + String sql = "select * from " + table + " where at in('" + from + "', '" + to + "');"; // WHERE clauses does not work: // String sql = "select * from " + tableName + " where hostname in('host1', 'host2', 'host3');"; @@ -200,7 +271,8 @@ public class QuestMetricsDb implements MetricsDb { record.getFloat(3), record.getFloat(4), record.getLong(5), - true)); + record.getBool(6), + record.getBool(7))); } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 5df45bbc1b1..9ac1ca2b4c1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -95,9 +95,14 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { } @Override - public void close() { - super.close(); + public void shutdown() { + super.shutdown(); this.deploymentExecutor.shutdownNow(); + } + + @Override + public void awaitShutdown() { + super.awaitShutdown(); try { // Give deployments in progress some time to complete this.deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 809c54146d0..c4744f6cb6a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -7,15 +7,21 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries; +import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.orchestrator.status.ApplicationLock; import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -29,6 +35,7 @@ import java.util.stream.Collectors; public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; + private final MetricsDb metricsDb; private final Deployer deployer; private final Metric metric; @@ -39,8 +46,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { Duration interval) { super(nodeRepository, interval, metric); this.autoscaler = new Autoscaler(metricsDb, nodeRepository); - this.metric = metric; + this.metricsDb = metricsDb; this.deployer = deployer; + this.metric = metric; } @Override @@ -57,44 +65,69 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private void autoscale(ApplicationId application, List<Node> applicationNodes) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { if ( ! deployment.isValid()) return; - nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes, deployment)); + nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, NodeList.copyOf(clusterNodes), deployment)); } } private void autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, - List<Node> clusterNodes, + NodeList clusterNodes, MaintenanceDeployment deployment) { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)); - Optional<Cluster> cluster = application.cluster(clusterId); - if (cluster.isEmpty()) return; - - var advice = autoscaler.autoscale(cluster.get(), clusterNodes); - - application = application.with(cluster.get().withAutoscalingStatus(advice.reason())); - if (advice.isEmpty()) { - applications().put(application, deployment.applicationLock().get()); - } - else if ( ! cluster.get().targetResources().equals(advice.target())) { - applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get()); + if (application.cluster(clusterId).isEmpty()) return; + Cluster cluster = application.cluster(clusterId).get(); + cluster = updateCompletion(cluster, clusterNodes); + var advice = autoscaler.autoscale(cluster, clusterNodes); + cluster = cluster.withAutoscalingStatus(advice.reason()); + + if (advice.isPresent() && !cluster.targetResources().equals(advice.target())) { // autoscale + cluster = cluster.withTarget(advice.target()); + applications().put(application.with(cluster), deployment.applicationLock().get()); if (advice.target().isPresent()) { - logAutoscaling(advice.target().get(), applicationId, cluster.get(), clusterNodes); + logAutoscaling(advice.target().get(), applicationId, cluster, clusterNodes); deployment.activate(); } } + else { // store cluster update + applications().put(application.with(cluster), deployment.applicationLock().get()); + } } private Applications applications() { return nodeRepository().applications(); } + /** Check if the last scaling event for this cluster has completed and if so record it in the returned instance */ + private Cluster updateCompletion(Cluster cluster, NodeList clusterNodes) { + if (cluster.lastScalingEvent().isEmpty()) return cluster; + var event = cluster.lastScalingEvent().get(); + if (event.completion().isPresent()) return cluster; + + // Scaling event is complete if: + // - 1. no nodes which was retired by this are still present (which also implies data distribution is complete) + if (clusterNodes.retired().stream() + .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at()))) + return cluster; + // - 2. all nodes have switched to the right config generation + for (NodeTimeseries nodeTimeseries : metricsDb.getNodeTimeseries(event.at(), clusterNodes)) { + Optional<MetricSnapshot> firstOnNewGeneration = + nodeTimeseries.asList().stream() + .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst(); + if (firstOnNewGeneration.isEmpty()) return cluster; // Not completed + } + + + // Set the completion time to the instant we notice completion. + Instant completionTime = nodeRepository().clock().instant(); + return cluster.with(event.withCompletion(completionTime)); + } + private void logAutoscaling(ClusterResources target, ApplicationId application, Cluster cluster, - List<Node> clusterNodes) { - ClusterResources current = new AllocatableClusterResources(clusterNodes, nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources(); - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - log.info("Autoscaling " + application + " " + clusterType + " " + cluster.id() + ":" + + NodeList clusterNodes) { + ClusterResources current = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources(); + log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" + "\nfrom " + toString(current) + "\nto " + toString(target)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 0195466b689..e271d5dcd11 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -2,36 +2,48 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; +import com.yahoo.component.Vtag; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterMembership; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.OutOfCapacityException; import com.yahoo.jdisc.Metric; +import com.yahoo.lang.MutableInteger; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.JacksonFlag; import com.yahoo.vespa.flags.ListFlag; -import com.yahoo.vespa.flags.custom.HostCapacity; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.flags.custom.ClusterCapacity; +import com.yahoo.vespa.flags.custom.SharedHost; +import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; +import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate; +import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator; +import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; import com.yahoo.yolean.Exceptions; import javax.naming.NameNotFoundException; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.logging.Level; @@ -48,7 +60,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { private static final Logger log = Logger.getLogger(DynamicProvisioningMaintainer.class.getName()); private final HostProvisioner hostProvisioner; - private final ListFlag<HostCapacity> targetCapacityFlag; + private final ListFlag<ClusterCapacity> preprovisionCapacityFlag; + private final JacksonFlag<SharedHost> sharedHostFlag; DynamicProvisioningMaintainer(NodeRepository nodeRepository, Duration interval, @@ -57,7 +70,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { Metric metric) { super(nodeRepository, interval, metric); this.hostProvisioner = hostProvisioner; - this.targetCapacityFlag = Flags.TARGET_CAPACITY.bindTo(flagSource); + this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource); + this.sharedHostFlag = PermanentFlags.SHARED_HOST.bindTo(flagSource); } @Override @@ -104,8 +118,17 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { /** Converge zone to wanted capacity */ private void convergeToCapacity(NodeList nodes) { - List<NodeResources> capacity = targetCapacity(); - List<Node> excessHosts = provision(capacity, nodes); + List<Node> excessHosts; + try { + excessHosts = provision(nodes); + } catch (OutOfCapacityException | IllegalStateException e) { + log.log(Level.WARNING, "Failed to provision preprovision capacity and/or find excess hosts: " + e.getMessage()); + return; // avoid removing excess hosts + } catch (RuntimeException e) { + log.log(Level.WARNING, "Failed to provision preprovision capacity and/or find excess hosts", e); + return; // avoid removing excess hosts + } + excessHosts.forEach(host -> { try { hostProvisioner.deprovision(host); @@ -119,72 +142,182 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { /** * Provision hosts to ensure there is room to allocate spare nodes. * - * @param advertisedSpareCapacity the advertised resources of the spare nodes - * @param nodes list of all nodes + * @param nodeList list of all nodes * @return excess hosts that can safely be deprovisioned: An excess host 1. contains no nodes allocated * to an application, and assuming the spare nodes have been allocated, and 2. is not parked * without wantToDeprovision (which means an operator is looking at the node). */ - private List<Node> provision(List<NodeResources> advertisedSpareCapacity, NodeList nodes) { - Map<String, Node> hostsByHostname = new HashMap<>(nodes.hosts().asList().stream() + private List<Node> provision(NodeList nodeList) { + final List<Node> nodes = new ArrayList<>(provisionUntilNoDeficit(nodeList)); + + + Map<String, Node> sharedHosts = new HashMap<>(findSharedHosts(nodeList)); + + int minCount = sharedHostFlag.value().getMinCount(); + int deficit = minCount - sharedHosts.size(); + if (deficit > 0) { + provisionHosts(deficit, NodeResources.unspecified()) + .forEach(host -> { + sharedHosts.put(host.hostname(), host); + nodes.add(host); + }); + } + + return candidatesForRemoval(nodes).stream() + .sorted(Comparator.comparing(node -> node.history().events().stream() + .map(History.Event::at).min(Comparator.naturalOrder()).orElseGet(() -> Instant.MIN))) + .filter(node -> { + if (!sharedHosts.containsKey(node.hostname()) || sharedHosts.size() > minCount) { + sharedHosts.remove(node.hostname()); + return true; + } else { + return false; + } + }) + .collect(Collectors.toList()); + } + + private List<Node> candidatesForRemoval(List<Node> nodes) { + Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() + .filter(node -> node.type() == NodeType.host) .filter(host -> host.state() != Node.State.parked || host.status().wantToDeprovision()) .collect(Collectors.toMap(Node::hostname, Function.identity()))); - nodes.asList().stream() + nodes.stream() .filter(node -> node.allocation().isPresent()) .flatMap(node -> node.parentHostname().stream()) .distinct() .forEach(hostsByHostname::remove); - List<Node> excessHosts = new ArrayList<>(hostsByHostname.values()); - - var capacity = new ArrayList<>(advertisedSpareCapacity); - for (Iterator<NodeResources> it = capacity.iterator(); it.hasNext() && !excessHosts.isEmpty(); ) { - NodeResources resources = it.next(); - excessHosts.stream() - .filter(nodeRepository()::canAllocateTenantNodeTo) - .filter(host -> nodeRepository().resourcesCalculator() - .advertisedResourcesOf(host.flavor()) - .satisfies(resources)) - .min(Comparator.comparingInt(n -> n.flavor().cost())) - .ifPresent(host -> { - excessHosts.remove(host); - it.remove(); - }); + return List.copyOf(hostsByHostname.values()); + } + + private Map<String, Node> findSharedHosts(NodeList nodeList) { + return nodeList.stream() + .filter(node -> NodeRepository.canAllocateTenantNodeTo(node, true)) + .filter(node -> node.reservedTo().isEmpty()) + .filter(node -> node.exclusiveTo().isEmpty()) + .collect(Collectors.toMap(Node::hostname, Function.identity())); + } + + /** + * @return the nodes in {@code nodeList} plus all hosts provisioned, plus all preprovision capacity + * nodes that were allocated. + * @throws OutOfCapacityException if there were problems provisioning hosts, and in case message + * should be sufficient (avoid no stack trace) + * @throws IllegalStateException if there was an algorithmic problem, and in case message + * should be sufficient (avoid no stack trace). + */ + private List<Node> provisionUntilNoDeficit(NodeList nodeList) { + List<ClusterCapacity> preprovisionCapacity = preprovisionCapacityFlag.value(); + + // Worst-case each ClusterCapacity in preprovisionCapacity will require an allocation. + int maxProvisions = preprovisionCapacity.size(); + + var nodesPlusProvisioned = new ArrayList<>(nodeList.asList()); + for (int numProvisions = 0;; ++numProvisions) { + var nodesPlusProvisionedPlusAllocated = new ArrayList<>(nodesPlusProvisioned); + Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated); + if (deficit.isEmpty()) { + return nodesPlusProvisionedPlusAllocated; + } + + if (numProvisions >= maxProvisions) { + throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting"); + } + + nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get()))); } + } - // Pre-provisioning is best effort, do one host at a time - capacity.forEach(resources -> { - try { - Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); - List<Node> hosts = hostProvisioner.provisionHosts(nodeRepository().database().getProvisionIndexes(1), - resources, ApplicationId.defaultId(), osVersion, - HostSharing.shared) - .stream() - .map(ProvisionedHost::generateHost) - .collect(Collectors.toList()); - nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer); - } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) { - log.log(Level.WARNING, "Failed to pre-provision " + resources + ": " + e.getMessage()); - } catch (RuntimeException e) { - log.log(Level.WARNING, "Failed to pre-provision " + resources + ", will retry in " + interval(), e); + private List<Node> provisionHosts(int count, NodeResources nodeResources) { + try { + Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); + List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(count); + List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, nodeResources, + ApplicationId.defaultId(), osVersion, HostSharing.shared) + .stream() + .map(ProvisionedHost::generateHost) + .collect(Collectors.toList()); + nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer); + return hosts; + } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) { + throw new OutOfCapacityException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage()); + } catch (RuntimeException e) { + throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e); + } + } + + /** + * Try to allocate the preprovision cluster capacity. + * + * @param mutableNodes represents all nodes in the node repo. As preprovision capacity is virtually allocated + * they are added to {@code mutableNodes} + * @return the part of a cluster capacity it was unable to allocate, if any + */ + private Optional<ClusterCapacity> allocatePreprovisionCapacity(List<ClusterCapacity> preprovisionCapacity, + ArrayList<Node> mutableNodes) { + for (int clusterIndex = 0; clusterIndex < preprovisionCapacity.size(); ++clusterIndex) { + ClusterCapacity clusterCapacity = preprovisionCapacity.get(clusterIndex); + LockedNodeList nodeList = new LockedNodeList(mutableNodes, () -> {}); + List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, nodeList); + int deficit = Math.max(0, clusterCapacity.count() - candidates.size()); + if (deficit > 0) { + return Optional.of(clusterCapacity.withCount(deficit)); } - }); - return excessHosts; + // Simulate allocating the cluster + mutableNodes.addAll(candidates); + } + + return Optional.empty(); } + private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList nodeList) { + NodeResources nodeResources = toNodeResources(clusterCapacity); + + // We'll allocate each ClusterCapacity as a unique cluster in a dummy application + ApplicationId applicationId = ApplicationId.defaultId(); + ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex)); + ClusterSpec clusterSpec = ClusterSpec.request(ClusterSpec.Type.content, clusterId) + // build() requires a version, even though it is not (should not be) used + .vespaVersion(Vtag.currentVersion) + .build(); + NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), nodeResources, false, true); + int wantedGroups = 1; + + NodePrioritizer prioritizer = new NodePrioritizer(nodeList, applicationId, clusterSpec, nodeSpec, wantedGroups, + true, nodeRepository().nameResolver(), nodeRepository().resourcesCalculator(), + nodeRepository().spareCount()); + List<NodeCandidate> nodeCandidates = prioritizer.collect(List.of()); + MutableInteger index = new MutableInteger(0); + return nodeCandidates + .stream() + .limit(clusterCapacity.count()) + .map(candidate -> candidate.toNode() + .allocate(applicationId, + ClusterMembership.from(clusterSpec, index.next()), + nodeResources, + nodeRepository().clock().instant())) + .collect(Collectors.toList()); + + } + + private static NodeResources toNodeResources(ClusterCapacity clusterCapacity) { + return new NodeResources(clusterCapacity.vcpu(), clusterCapacity.memoryGb(), clusterCapacity.diskGb(), + clusterCapacity.bandwidthGbps()); + } /** Reads node resources declared by target capacity flag */ private List<NodeResources> targetCapacity() { - return targetCapacityFlag.value().stream() - .flatMap(cap -> { - NodeResources resources = new NodeResources(cap.getVcpu(), cap.getMemoryGb(), - cap.getDiskGb(), 1); - return IntStream.range(0, cap.getCount()).mapToObj(i -> resources); - }) - .sorted(NodeResourceComparator.memoryDiskCpuOrder().reversed()) - .collect(Collectors.toList()); + return preprovisionCapacityFlag.value().stream() + .flatMap(cap -> { + NodeResources resources = new NodeResources(cap.vcpu(), cap.memoryGb(), + cap.diskGb(), cap.bandwidthGbps()); + return IntStream.range(0, cap.count()).mapToObj(i -> resources); + }) + .sorted(NodeResourceComparator.memoryDiskCpuOrder().reversed()) + .collect(Collectors.toList()); } /** Verify DNS configuration of given nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index ee55e22e89c..62d042d5e8b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -177,7 +177,7 @@ class MaintenanceDeployment implements Closeable { if ( deployment.activate().isEmpty()) return false; log.info(agent + " redeployed " + application + " to " + - ( verifyTarget ? this : "move " + (node.hostname() + " from " + fromHost))); + ( verifyTarget ? this : "move " + (node + " from " + fromHost.hostname()))); return true; } finally { @@ -225,7 +225,7 @@ class MaintenanceDeployment implements Closeable { @Override public String toString() { return "move " + - ( isEmpty() ? "none" : (node.hostname() + " from " + fromHost + " to " + toHost)); + ( isEmpty() ? "none" : (node + " from " + fromHost.hostname() + " to " + toHost.hostname())); } public static Move empty() { return new Move(null, null, null); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index d2dcaaeae5b..4a5c28fe0c8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; @@ -26,10 +27,12 @@ import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -41,6 +44,7 @@ import static com.yahoo.config.provision.NodeResources.DiskSpeed.any; */ public class MetricsReporter extends NodeRepositoryMaintainer { + private final Set<Pair<Metric.Context, String>> nonZeroMetrics = new HashSet<>(); private final Metric metric; private final Orchestrator orchestrator; private final ServiceMonitor serviceMonitor; @@ -261,28 +265,39 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .forEach((lockPath, lockMetrics) -> { Metric.Context context = getContext(Map.of("lockPath", lockPath)); - metric.set("lockAttempt.acquire", lockMetrics.getAndResetAcquireCount(), context); - metric.set("lockAttempt.acquireFailed", lockMetrics.getAndResetAcquireFailedCount(), context); - metric.set("lockAttempt.acquireTimedOut", lockMetrics.getAndResetAcquireTimedOutCount(), context); - metric.set("lockAttempt.locked", lockMetrics.getAndResetAcquireSucceededCount(), context); - metric.set("lockAttempt.release", lockMetrics.getAndResetReleaseCount(), context); - metric.set("lockAttempt.releaseFailed", lockMetrics.getAndResetReleaseFailedCount(), context); - metric.set("lockAttempt.reentry", lockMetrics.getAndResetReentryCount(), context); - metric.set("lockAttempt.deadlock", lockMetrics.getAndResetDeadlockCount(), context); - metric.set("lockAttempt.nakedRelease", lockMetrics.getAndResetNakedReleaseCount(), context); - metric.set("lockAttempt.acquireWithoutRelease", lockMetrics.getAndResetAcquireWithoutReleaseCount(), context); - metric.set("lockAttempt.foreignRelease", lockMetrics.getAndResetForeignReleaseCount(), context); - - setLockLatencyMetrics("acquire", lockMetrics.getAndResetAcquireLatencyMetrics(), context); - setLockLatencyMetrics("locked", lockMetrics.getAndResetLockedLatencyMetrics(), context); + LatencyMetrics acquireLatencyMetrics = lockMetrics.getAndResetAcquireLatencyMetrics(); + setNonZero("lockAttempt.acquireMaxActiveLatency", acquireLatencyMetrics.maxActiveLatencySeconds(), context); + setNonZero("lockAttempt.acquireHz", acquireLatencyMetrics.startHz(), context); + setNonZero("lockAttempt.acquireLoad", acquireLatencyMetrics.load(), context); + + LatencyMetrics lockedLatencyMetrics = lockMetrics.getAndResetLockedLatencyMetrics(); + setNonZero("lockAttempt.lockedLatency", lockedLatencyMetrics.maxLatencySeconds(), context); + setNonZero("lockAttempt.lockedLoad", lockedLatencyMetrics.load(), context); + + setNonZero("lockAttempt.acquireTimedOut", lockMetrics.getAndResetAcquireTimedOutCount(), context); + setNonZero("lockAttempt.deadlock", lockMetrics.getAndResetDeadlockCount(), context); + + // bucket for various rare errors - to reduce #metrics + setNonZero("lockAttempt.errors", + lockMetrics.getAndResetAcquireFailedCount() + + lockMetrics.getAndResetReleaseFailedCount() + + lockMetrics.getAndResetNakedReleaseCount() + + lockMetrics.getAndResetAcquireWithoutReleaseCount() + + lockMetrics.getAndResetForeignReleaseCount(), + context); }); } - private void setLockLatencyMetrics(String name, LatencyMetrics latencyMetrics, Metric.Context context) { - metric.set("lockAttempt." + name + "Latency", latencyMetrics.latencySeconds(), context); - metric.set("lockAttempt." + name + "MaxActiveLatency", latencyMetrics.maxActiveLatencySeconds(), context); - metric.set("lockAttempt." + name + "Hz", latencyMetrics.startHz(), context); - metric.set("lockAttempt." + name + "Load", latencyMetrics.load(), context); + private void setNonZero(String key, Number value, Metric.Context context) { + var metricKey = new Pair<>(context, key); + if (Double.compare(value.doubleValue(), 0.0) != 0) { + metric.set(key, value, context); + nonZeroMetrics.add(metricKey); + } else if (nonZeroMetrics.remove(metricKey)) { + // Need to set the metric to 0 after it has been set to non-zero, to avoid carrying + // a non-zero 'last' from earlier periods. + metric.set(key, value, context); + } } private void updateDockerMetrics(NodeList nodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 6d571fada9e..2999655e5fa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -153,7 +153,9 @@ public class NodeFailer extends NodeRepositoryMaintainer { Map<Node, String> nodesByFailureReason = new HashMap<>(); for (Node node : activeNodes) { if (node.history().hasEventBefore(History.Event.Type.down, graceTimeEnd) && ! applicationSuspended(node)) { - nodesByFailureReason.put(node, "Node has been down longer than " + downTimeLimit); + // Allow a grace period after node re-activation + if ( ! node.history().hasEventAfter(History.Event.Type.activated, graceTimeEnd)) + nodesByFailureReason.put(node, "Node has been down longer than " + downTimeLimit); } else if (hostSuspended(node, activeNodes)) { Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().getNode(parent)).orElse(node); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index ff7bc9393bd..017e1264f1c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -1,23 +1,18 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.yolean.Exceptions; import java.time.Duration; -import java.util.Collection; import java.util.logging.Level; -import java.util.stream.Collectors; /** - * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all - * active nodes. + * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all active nodes. * * @author bratseth */ @@ -43,12 +38,12 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { int warnings = 0; for (ApplicationId application : activeNodesByApplication().keySet()) { try { - metricsDb.add(filter(metricsFetcher.fetchMetrics(application))); + metricsDb.add(metricsFetcher.fetchMetrics(application)); } catch (Exception e) { // TODO: Don't warn if this only happens occasionally if (warnings++ < maxWarningsPerInvocation) - log.log(Level.WARNING, "Could not update metrics for " + application + ": " + Exceptions.toMessageString(e)); + log.log(Level.WARNING, "Could not update metrics for " + application + ": " + Exceptions.toMessageString(e), e); } } metricsDb.gc(); @@ -59,11 +54,4 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { return warnings == 0; } - /** Filter out uninformative snapshots before storing */ - private Collection<Pair<String, MetricSnapshot>> filter(Collection<Pair<String, MetricSnapshot>> snapshots) { - return snapshots.stream() - .filter(snapshot -> snapshot.getSecond().inService()) - .collect(Collectors.toList()); - } - } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index 214872348a3..96d10415e63 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -3,8 +3,8 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.IntFlag; +import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; @@ -32,7 +32,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer { NodeRebooter(NodeRepository nodeRepository, FlagSource flagSource, Metric metric) { super(nodeRepository, Duration.ofMinutes(25), metric); - this.rebootIntervalInDays = Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource); + this.rebootIntervalInDays = PermanentFlags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource); this.random = new Random(nodeRepository.clock().millis()); // seed with clock for test determinism } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 9f2c6f05b00..ac33cc2441c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.maintenance.Maintainer; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; @@ -18,7 +19,8 @@ import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; -import java.util.Optional; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; /** * A component which sets up all the node repo maintenance jobs. @@ -27,28 +29,7 @@ import java.util.Optional; */ public class NodeRepositoryMaintenance extends AbstractComponent { - private final NodeFailer nodeFailer; - private final NodeHealthTracker nodeHealthTracker; - private final PeriodicApplicationMaintainer periodicApplicationMaintainer; - private final OperatorChangeApplicationMaintainer operatorChangeApplicationMaintainer; - private final ReservationExpirer reservationExpirer; - private final InactiveExpirer inactiveExpirer; - private final RetiredExpirer retiredExpirer; - private final FailedExpirer failedExpirer; - private final DirtyExpirer dirtyExpirer; - private final ProvisionedExpirer provisionedExpirer; - private final NodeRebooter nodeRebooter; - private final MetricsReporter metricsReporter; - private final InfrastructureProvisioner infrastructureProvisioner; - private final Optional<LoadBalancerExpirer> loadBalancerExpirer; - private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer; - private final SpareCapacityMaintainer spareCapacityMaintainer; - private final OsUpgradeActivator osUpgradeActivator; - private final Rebalancer rebalancer; - private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; - private final AutoscalingMaintainer autoscalingMaintainer; - private final ScalingSuggestionsMaintainer scalingSuggestionsMaintainer; - private final SwitchRebalancer switchRebalancer; + private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @SuppressWarnings("unused") @Inject @@ -59,60 +40,45 @@ public class NodeRepositoryMaintenance extends AbstractComponent { MetricsFetcher metricsFetcher, MetricsDb metricsDb) { DefaultTimes defaults = new DefaultTimes(zone, deployer); - nodeFailer = new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric); - nodeHealthTracker = new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric); - periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, - defaults.redeployMaintainerInterval, defaults.periodicRedeployInterval, flagSource); - operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval); - reservationExpirer = new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric); - retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry); - inactiveExpirer = new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric); - failedExpirer = new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric); - dirtyExpirer = new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric); - provisionedExpirer = new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric); - nodeRebooter = new NodeRebooter(nodeRepository, flagSource, metric); - metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval); - infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric); - loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService(nodeRepository).map(lbService -> - new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)); - dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner -> - new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)); - spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval); - osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric); - rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval); - nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric); - autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval); - scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric); - switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer); - + PeriodicApplicationMaintainer periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval, + defaults.periodicRedeployInterval, flagSource); + InfrastructureProvisioner infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric); + maintainers.add(periodicApplicationMaintainer); + maintainers.add(infrastructureProvisioner); + + maintainers.add(new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric)); + maintainers.add(new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric)); + maintainers.add(new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval)); + maintainers.add(new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric)); + maintainers.add(new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry)); + maintainers.add(new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric)); + maintainers.add(new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric)); + maintainers.add(new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric)); + maintainers.add(new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric)); + maintainers.add(new NodeRebooter(nodeRepository, flagSource, metric)); + maintainers.add(new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval)); + maintainers.add(new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval)); + maintainers.add(new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric)); + maintainers.add(new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval)); + maintainers.add(new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric)); + maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval)); + maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric)); + maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer)); + + provisionServiceProvider.getLoadBalancerService(nodeRepository) + .map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)) + .ifPresent(maintainers::add); + provisionServiceProvider.getHostProvisioner() + .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)) + .ifPresent(maintainers::add); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); } @Override public void deconstruct() { - nodeFailer.close(); - nodeHealthTracker.close(); - periodicApplicationMaintainer.close(); - operatorChangeApplicationMaintainer.close(); - reservationExpirer.close(); - inactiveExpirer.close(); - retiredExpirer.close(); - failedExpirer.close(); - dirtyExpirer.close(); - nodeRebooter.close(); - spareCapacityMaintainer.close(); - provisionedExpirer.close(); - metricsReporter.close(); - infrastructureProvisioner.close(); - loadBalancerExpirer.ifPresent(NodeRepositoryMaintainer::close); - dynamicProvisioningMaintainer.ifPresent(NodeRepositoryMaintainer::close); - osUpgradeActivator.close(); - rebalancer.close(); - nodeMetricsDbMaintainer.close(); - autoscalingMaintainer.close(); - scalingSuggestionsMaintainer.close(); - switchRebalancer.close(); + maintainers.forEach(Maintainer::shutdown); + maintainers.forEach(Maintainer::awaitShutdown); } private static class DefaultTimes { @@ -164,7 +130,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { operatorChangeRedeployInterval = Duration.ofMinutes(3); // Vespa upgrade frequency is higher in CD so (de)activate OS upgrades more frequently as well osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5); - periodicRedeployInterval = Duration.ofMinutes(30); + periodicRedeployInterval = Duration.ofMinutes(60); provisionedExpiry = Duration.ofHours(4); rebalancerInterval = Duration.ofMinutes(120); redeployMaintainerInterval = Duration.ofMinutes(1); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index 289d5a6742a..d20b06becaf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -7,7 +7,7 @@ import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -70,7 +70,7 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { } private boolean shouldMaintain(ApplicationId id) { - BooleanFlag skipMaintenanceDeployment = Flags.SKIP_MAINTENANCE_DEPLOYMENT.bindTo(flagSource) + BooleanFlag skipMaintenanceDeployment = PermanentFlags.SKIP_MAINTENANCE_DEPLOYMENT.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, id.serializedForm()); return ! skipMaintenanceDeployment.value(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index 3546c8d8afb..7cb0270636f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Applications; @@ -51,7 +52,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private int suggest(ApplicationId application, List<Node> applicationNodes) { int successes = 0; for (var cluster : nodesByCluster(applicationNodes).entrySet()) - successes += suggest(application, cluster.getKey(), cluster.getValue()) ? 1 : 0; + successes += suggest(application, cluster.getKey(), NodeList.copyOf(cluster.getValue())) ? 1 : 0; return successes; } @@ -61,7 +62,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private boolean suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, - List<Node> clusterNodes) { + NodeList clusterNodes) { Application application = applications().get(applicationId).orElse(new Application(applicationId)); Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java index b490cdf4c24..8c9e54a2ae4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -33,14 +33,13 @@ public class SwitchRebalancer extends NodeMover<Move> { @Override protected boolean maintain() { - if ( ! nodeRepository().isWorking()) return false; + if (!nodeRepository().isWorking()) return false; + if (!nodeRepository().zone().environment().isProduction()) return true; + NodeList allNodes = nodeRepository().list(); // Lockless as strong consistency is not needed + if (!zoneIsStable(allNodes)) return true; - boolean success = true; - // Using node list without holding lock as strong consistency is not needed here - NodeList allNodes = nodeRepository().list(); - if (!zoneIsStable(allNodes)) return success; findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository()); - return success; + return true; } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index e92415d6538..3c2541bac27 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -39,6 +39,13 @@ public class History { /** Returns this event if it is present in this history */ public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); } + /** Returns true if a given event is registered in this history at the given time */ + public boolean hasEventAt(Event.Type type, Instant time) { + return event(type) + .map(event -> event.at().equals(time)) + .orElse(false); + } + /** Returns true if a given event is registered in this history after the given time */ public boolean hasEventAfter(Event.Type type, Instant time) { return event(type) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 3979b898145..4b9b14656ca 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -52,6 +52,7 @@ public class ApplicationSerializer { private static final String toKey = "to"; private static final String generationKey = "generation"; private static final String atKey = "at"; + private static final String completionKey = "completion"; public static byte[] toJson(Application application) { Slime slime = new Slime(); @@ -140,13 +141,19 @@ public class ApplicationSerializer { toSlime(event.to(), object.setObject(toKey)); object.setLong(generationKey, event.generation()); object.setLong(atKey, event.at().toEpochMilli()); + event.completion().ifPresent(completion -> object.setLong(completionKey, completion.toEpochMilli())); } private static ScalingEvent scalingEventFromSlime(Inspector inspector) { return new ScalingEvent(clusterResourcesFromSlime(inspector.field(fromKey)), clusterResourcesFromSlime(inspector.field(toKey)), inspector.field(generationKey).asLong(), - Instant.ofEpochMilli(inspector.field(atKey).asLong())); + Instant.ofEpochMilli(inspector.field(atKey).asLong()), + optionalInstant(inspector.field(completionKey))); + } + + private static Optional<Instant> optionalInstant(Inspector inspector) { + return inspector.valid() ? Optional.of(Instant.ofEpochMilli(inspector.asLong())) : Optional.empty(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 42e26814d41..696853b2992 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -70,7 +70,7 @@ public class CuratorDatabaseClient { private static final Path containerImagesPath = root.append("dockerImages"); private static final Path firmwareCheckPath = root.append("firmwareCheck"); - private static final Duration defaultLockTimeout = Duration.ofMinutes(2); + private static final Duration defaultLockTimeout = Duration.ofMinutes(6); private final NodeSerializer nodeSerializer; private final CuratorDatabase db; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/JobControlFlags.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/JobControlFlags.java index 2a2a45186f9..79f4403e534 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/JobControlFlags.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/JobControlFlags.java @@ -3,8 +3,8 @@ package com.yahoo.vespa.hosted.provision.persistence; import com.yahoo.concurrent.maintenance.JobControlState; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.ListFlag; +import com.yahoo.vespa.flags.PermanentFlags; import java.util.Set; @@ -20,7 +20,7 @@ public class JobControlFlags implements JobControlState { public JobControlFlags(CuratorDatabaseClient curator, FlagSource flagSource) { this.curator = curator; - this.inactiveJobsFlag = Flags.INACTIVE_MAINTENANCE_JOBS.bindTo(flagSource); + this.inactiveJobsFlag = PermanentFlags.INACTIVE_MAINTENANCE_JOBS.bindTo(flagSource); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 3cae4a5a5ea..2b03a5cae8c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Allocation; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -60,6 +61,7 @@ class Activator { * while holding the node repository lock on this application */ private void activateNodes(Collection<HostSpec> hosts, long generation, ApplicationTransaction transaction) { + Instant activationTime = nodeRepository.clock().instant(); // Use one timestamp for all activation changes ApplicationId application = transaction.application(); Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet()); NodeList allNodes = nodeRepository.list(); @@ -69,7 +71,7 @@ class Activator { List<Node> reservedToActivate = updatePortsFrom(hosts, retainHostsInList(hostnames, reserved)); List<Node> oldActive = applicationNodes.state(Node.State.active).asList(); // All nodes active now List<Node> continuedActive = retainHostsInList(hostnames, oldActive); - List<Node> newActive = updateFrom(hosts, continuedActive); // All nodes that will be active when this is committed + List<Node> newActive = updateFrom(hosts, continuedActive, activationTime); // All nodes that will be active when this is committed newActive.addAll(reservedToActivate); if ( ! containsAll(hostnames, newActive)) throw new IllegalArgumentException("Activation of " + application + " failed. " + @@ -84,16 +86,16 @@ class Activator { List<Node> activeToRemove = removeHostsFromList(hostnames, oldActive); activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired. TODO: Move this line to deactivate - nodeRepository.deactivate(activeToRemove, transaction); + nodeRepository.deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line nodeRepository.activate(newActive, transaction.nested()); // activate also continued active to update node state - rememberResourceChange(transaction, generation, + rememberResourceChange(transaction, generation, activationTime, NodeList.copyOf(oldActive).not().retired(), NodeList.copyOf(newActive).not().retired()); unreserveParentsOf(reservedToActivate); } - private void rememberResourceChange(ApplicationTransaction transaction, long generation, + private void rememberResourceChange(ApplicationTransaction transaction, long generation, Instant at, NodeList oldNodes, NodeList newNodes) { Optional<Application> application = nodeRepository.applications().get(transaction.application()); if (application.isEmpty()) return; // infrastructure app, hopefully :-| @@ -102,15 +104,18 @@ class Activator { .collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id())); Application modified = application.get(); for (var clusterEntry : currentNodesByCluster.entrySet()) { + var cluster = modified.cluster(clusterEntry.getKey()).get(); var previousResources = oldNodes.cluster(clusterEntry.getKey()).toResources(); var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources(); - if ( ! previousResources.equals(currentResources)) { - modified = modified.with(application.get().cluster(clusterEntry.getKey()).get() - .with(new ScalingEvent(previousResources, - currentResources, - generation, - nodeRepository.clock().instant()))); + if ( ! previousResources.justNumbers().equals(currentResources.justNumbers())) { + cluster = cluster.with(ScalingEvent.create(previousResources, currentResources, generation, at)); } + if (cluster.targetResources().isPresent() + && cluster.targetResources().get().justNumbers().equals(currentResources.justNumbers())) { + cluster = cluster.withAutoscalingStatus("Cluster is ideally scaled within configured limits"); + } + if (cluster != modified.cluster(clusterEntry.getKey()).get()) + modified = modified.with(cluster); } if (modified != application.get()) @@ -202,11 +207,11 @@ class Activator { } /** Returns the input nodes with the changes resulting from applying the settings in hosts to the given list of nodes. */ - private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes) { + private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes, Instant at) { List<Node> updated = new ArrayList<>(); for (Node node : nodes) { HostSpec hostSpec = getHost(node.hostname(), hosts); - node = hostSpec.membership().get().retired() ? node.retire(nodeRepository.clock().instant()) : node.unretire(); + node = hostSpec.membership().get().retired() ? node.retire(at) : node.unretire(); if (! hostSpec.advertisedResources().equals(node.resources())) // A resized node node = node.with(new Flavor(hostSpec.advertisedResources())); Allocation allocation = node.allocation().get() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java index e04c1aa208d..54530297baa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java @@ -55,9 +55,9 @@ public class FlavorConfigBuilder { else if (flavorName.equals("host2")) flavorConfigBuilder.addFlavor(flavorName, 16, 24, 100, 1, Flavor.Type.BARE_METAL); else if (flavorName.equals("host3")) - flavorConfigBuilder.addFlavor(flavorName, 24, 64, 100, 1, Flavor.Type.BARE_METAL); + flavorConfigBuilder.addFlavor(flavorName, 24, 64, 100, 10, Flavor.Type.BARE_METAL); else if (flavorName.equals("host4")) - flavorConfigBuilder.addFlavor(flavorName, 48, 128, 1000, 1, Flavor.Type.BARE_METAL); + flavorConfigBuilder.addFlavor(flavorName, 48, 128, 1000, 10, Flavor.Type.BARE_METAL); else if (flavorName.equals("devhost")) flavorConfigBuilder.addFlavor(flavorName, 4., 80., 100, 10, Flavor.Type.BARE_METAL); else diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 6462fb6f19d..f02659aab5f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -122,7 +122,9 @@ public class GroupPreparer { NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requestedNodes, highestIndex, nodeRepository); NodePrioritizer prioritizer = new NodePrioritizer( - allNodes, application, cluster, requestedNodes, wantedGroups, nodeRepository); + allNodes, application, cluster, requestedNodes, wantedGroups, + nodeRepository.zone().getCloud().dynamicProvisioning(), nodeRepository.nameResolver(), + nodeRepository.resourcesCalculator(), nodeRepository.spareCount()); allocation.offer(prioritizer.collect(surplusActiveNodes)); return allocation; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 206aa077027..0b5a04ca42c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -2,10 +2,12 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.exception.LoadBalancerServiceException; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.flags.BooleanFlag; @@ -82,7 +84,8 @@ public class LoadBalancerProvisioner { try (var lock = db.lock(application)) { ClusterSpec.Id clusterId = effectiveId(cluster); List<Node> nodes = nodesOf(clusterId, application); - provision(application, clusterId, nodes, false); + LoadBalancerId loadBalancerId = requireNonClashing(new LoadBalancerId(application, clusterId)); + provision(loadBalancerId, nodes, false); } } @@ -149,9 +152,30 @@ public class LoadBalancerProvisioner { return canForwardTo; } + /** Find all load balancer IDs owned by given tenant and application */ + private List<LoadBalancerId> findLoadBalancers(TenantName tenant, ApplicationName application) { + return db.readLoadBalancerIds().stream() + .filter(id -> id.application().tenant().equals(tenant) && + id.application().application().equals(application)) + .collect(Collectors.toUnmodifiableList()); + } + + /** Require that load balancer IDs do not clash. This prevents name clashing when compacting endpoint DNS names */ + private LoadBalancerId requireNonClashing(LoadBalancerId loadBalancerId) { + List<LoadBalancerId> loadBalancerIds = findLoadBalancers(loadBalancerId.application().tenant(), + loadBalancerId.application().application()); + List<String> nonCompactableIds = withoutCompactableIds(loadBalancerId); + for (var id : loadBalancerIds) { + if (id.equals(loadBalancerId)) continue; + if (nonCompactableIds.equals(withoutCompactableIds(id))) { + throw new IllegalArgumentException(loadBalancerId + " clashes with " + id); + } + } + return loadBalancerId; + } + /** Idempotently provision a load balancer for given application and cluster */ - private void provision(ApplicationId application, ClusterSpec.Id clusterId, List<Node> nodes, boolean activate) { - var id = new LoadBalancerId(application, clusterId); + private void provision(LoadBalancerId id, List<Node> nodes, boolean activate) { var now = nodeRepository.clock().instant(); var loadBalancer = db.readLoadBalancer(id); if (loadBalancer.isEmpty() && activate) return; // Nothing to activate as this load balancer was never prepared @@ -171,6 +195,10 @@ public class LoadBalancerProvisioner { db.writeLoadBalancer(newLoadBalancer); } + private void provision(ApplicationId application, ClusterSpec.Id clusterId, List<Node> nodes, boolean activate) { + provision(new LoadBalancerId(application, clusterId), nodes, activate); + } + private LoadBalancerInstance provisionInstance(LoadBalancerId id, List<Node> nodes, boolean force) { var reals = new LinkedHashSet<Real>(); for (var node : nodes) { @@ -206,6 +234,18 @@ public class LoadBalancerProvisioner { return nodes.stream().collect(Collectors.groupingBy(node -> effectiveId(node.allocation().get().membership().cluster()))); } + /** Returns a list of the non-compactable IDs of given load balancer */ + private static List<String> withoutCompactableIds(LoadBalancerId id) { + List<String> ids = new ArrayList<>(2); + if (!"default".equals(id.cluster().value())) { + ids.add(id.cluster().value()); + } + if (!id.application().instance().isDefault()) { + ids.add(id.application().instance().value()); + } + return ids; + } + /** Find IP addresses reachable by the load balancer service */ private Set<String> reachableIpAddresses(Node node) { Set<String> reachable = new LinkedHashSet<>(node.ipConfig().primary()); @@ -225,4 +265,5 @@ public class LoadBalancerProvisioner { return cluster.combinedId().orElse(cluster.id()); } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 68e11c4c995..bc164dc37e0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -203,7 +203,7 @@ class NodeAllocation { * Such nodes will be marked retired during finalization of the list of accepted nodes. * The conditions for this are: * - * This is a content or combined node. These must always be retired before being removed to allow the cluster to + * This is a stateful node. These must always be retired before being removed to allow the cluster to * migrate away data. * * This is a container node and it is not desired due to having the wrong flavor. In this case this @@ -218,7 +218,7 @@ class NodeAllocation { if (candidate.allocation().get().membership().retired()) return true; // don't second-guess if already retired if (! requestedNodes.considerRetiring()) return false; - return cluster.type().isContent() || + return cluster.isStateful() || (cluster.type() == ClusterSpec.Type.container && !hasCompatibleFlavor(candidate)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 14937e6afeb..460b7a821e6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -8,10 +8,10 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.persistence.NameResolver; import com.yahoo.yolean.Exceptions; import java.time.Instant; @@ -25,7 +25,7 @@ import java.util.logging.Logger; * * @author smorgrav */ -abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { +public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { private static final Logger log = Logger.getLogger(NodeCandidate.class.getName()); @@ -224,8 +224,8 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { Node parent, boolean violatesSpares, LockedNodeList allNodes, - NodeRepository nodeRepository) { - return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, nodeRepository); + NameResolver nameResolver) { + return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, nameResolver); } public static NodeCandidate createNewExclusiveChild(Node node, Node parent) { @@ -316,7 +316,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { /** Needed to construct the node */ private final LockedNodeList allNodes; - private final NodeRepository nodeRepository; + private final NameResolver nameResolver; private VirtualNodeCandidate(NodeResources resources, NodeResources freeParentCapacity, @@ -324,11 +324,11 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { boolean violatesSpares, boolean exclusiveSwitch, LockedNodeList allNodes, - NodeRepository nodeRepository) { + NameResolver nameResolver) { super(freeParentCapacity, Optional.of(parent), violatesSpares, exclusiveSwitch, false, true, false); this.resources = resources; this.allNodes = allNodes; - this.nodeRepository = nodeRepository; + this.nameResolver = nameResolver; } @Override @@ -361,7 +361,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { public NodeCandidate withNode() { Optional<IP.Allocation> allocation; try { - allocation = parent.get().ipConfig().pool().findAllocation(allNodes, nodeRepository.nameResolver()); + allocation = parent.get().ipConfig().pool().findAllocation(allNodes, nameResolver); if (allocation.isEmpty()) return new InvalidNodeCandidate(resources, freeParentCapacity, parent.get(), "No addresses available on parent host"); } catch (Exception e) { @@ -382,7 +382,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { @Override public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { - return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, nodeRepository); + return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, nameResolver); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index abfd5e021c4..b88556fbfec 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.persistence.NameResolver; import java.util.ArrayList; import java.util.Collections; @@ -34,7 +35,8 @@ public class NodePrioritizer { private final NodeSpec requestedNodes; private final ApplicationId application; private final ClusterSpec clusterSpec; - private final NodeRepository nodeRepository; + private final NameResolver nameResolver; + private final boolean dynamicProvisioning; /** Whether node specification allows new nodes to be allocated. */ private final boolean canAllocateNew; private final boolean canAllocateToSpareHosts; @@ -42,19 +44,19 @@ public class NodePrioritizer { private final int currentClusterSize; private final Set<Node> spareHosts; - NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec, - int wantedGroups, NodeRepository nodeRepository) { - boolean dynamicProvisioning = nodeRepository.zone().getCloud().dynamicProvisioning(); - + public NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec, + int wantedGroups, boolean dynamicProvisioning, NameResolver nameResolver, + HostResourcesCalculator hostResourcesCalculator, int spareCount) { this.allNodes = allNodes; - this.capacity = new HostCapacity(allNodes, nodeRepository.resourcesCalculator()); + this.capacity = new HostCapacity(allNodes, hostResourcesCalculator); this.requestedNodes = nodeSpec; this.clusterSpec = clusterSpec; this.application = application; + this.dynamicProvisioning = dynamicProvisioning; this.spareHosts = dynamicProvisioning ? capacity.findSpareHostsInDynamicallyProvisionedZones(allNodes.asList()) : - capacity.findSpareHosts(allNodes.asList(), nodeRepository.spareCount()); - this.nodeRepository = nodeRepository; + capacity.findSpareHosts(allNodes.asList(), spareCount); + this.nameResolver = nameResolver; NodeList nodesInCluster = allNodes.owner(application).type(clusterSpec.type()).cluster(clusterSpec.id()); NodeList nonRetiredNodesInCluster = nodesInCluster.not().retired(); @@ -81,7 +83,7 @@ public class NodePrioritizer { } /** Collects all node candidates for this application and returns them in the most-to-least preferred order */ - List<NodeCandidate> collect(List<Node> surplusActiveNodes) { + public List<NodeCandidate> collect(List<Node> surplusActiveNodes) { addApplicationNodes(); addSurplusNodes(surplusActiveNodes); addReadyNodes(); @@ -131,7 +133,7 @@ public class NodePrioritizer { if ( !canAllocateNew) return; for (Node host : allNodes) { - if ( ! nodeRepository.canAllocateTenantNodeTo(host)) continue; + if ( ! NodeRepository.canAllocateTenantNodeTo(host, dynamicProvisioning)) continue; if (host.reservedTo().isPresent() && !host.reservedTo().get().equals(application.tenant())) continue; if (host.reservedTo().isPresent() && application.instance().isTester()) continue; if (host.exclusiveTo().isPresent()) continue; // Never allocate new nodes to exclusive hosts @@ -143,7 +145,7 @@ public class NodePrioritizer { host, spareHosts.contains(host), allNodes, - nodeRepository)); + nameResolver)); } } @@ -209,7 +211,7 @@ public class NodePrioritizer { if (node.type() != NodeType.tenant || node.parentHostname().isEmpty()) return true; Optional<Node> parent = allNodes.parentOf(node); if (parent.isEmpty()) return false; - return nodeRepository.canAllocateTenantNodeTo(parent.get()); + return NodeRepository.canAllocateTenantNodeTo(parent.get(), dynamicProvisioning); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index ede6f4ef250..a6d68243160 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -29,7 +29,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.AllocationOptimizer; import com.yahoo.vespa.hosted.provision.autoscale.Limits; import com.yahoo.vespa.hosted.provision.autoscale.ResourceTarget; -import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter; import com.yahoo.vespa.hosted.provision.node.filter.NodeHostFilter; @@ -168,7 +167,7 @@ public class NodeRepositoryProvisioner implements Provisioner { boolean firstDeployment = nodes.isEmpty(); AllocatableClusterResources currentResources = firstDeployment // start at min, preserve current resources otherwise - ? new AllocatableClusterResources(requested.minResources(), clusterSpec.type(), clusterSpec.isExclusive(), nodeRepository) + ? new AllocatableClusterResources(requested.minResources(), clusterSpec, nodeRepository) : new AllocatableClusterResources(nodes, nodeRepository, clusterSpec.isExclusive()); return within(Limits.of(requested), clusterSpec.isExclusive(), currentResources, firstDeployment); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java index 6f1334421ef..0f9babb53aa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LocksResponse.java @@ -51,26 +51,6 @@ public class LocksResponse extends HttpResponse { root.setString("hostname", hostname); root.setString("time", Instant.now().toString()); - Cursor lockPathsCursor = root.setArray("lock-paths"); - lockMetricsByPath.forEach((lockPath, lockMetrics) -> { - Cursor lockPathCursor = lockPathsCursor.addObject(); - lockPathCursor.setString("path", lockPath); - lockPathCursor.setLong("acquireCount", lockMetrics.getCumulativeAcquireCount()); - lockPathCursor.setLong("acquireFailedCount", lockMetrics.getCumulativeAcquireFailedCount()); - lockPathCursor.setLong("acquireTimedOutCount", lockMetrics.getCumulativeAcquireTimedOutCount()); - lockPathCursor.setLong("lockedCount", lockMetrics.getCumulativeAcquireSucceededCount()); - lockPathCursor.setLong("releaseCount", lockMetrics.getCumulativeReleaseCount()); - lockPathCursor.setLong("releaseFailedCount", lockMetrics.getCumulativeReleaseFailedCount()); - lockPathCursor.setLong("reentryCount", lockMetrics.getCumulativeReentryCount()); - lockPathCursor.setLong("deadlock", lockMetrics.getCumulativeDeadlockCount()); - lockPathCursor.setLong("nakedRelease", lockMetrics.getCumulativeNakedReleaseCount()); - lockPathCursor.setLong("acquireWithoutRelease", lockMetrics.getCumulativeAcquireWithoutReleaseCount()); - lockPathCursor.setLong("foreignRelease", lockMetrics.getCumulativeForeignReleaseCount()); - - setLatency(lockPathCursor, "acquire", lockMetrics.getAcquireLatencyMetrics()); - setLatency(lockPathCursor, "locked", lockMetrics.getLockedLatencyMetrics()); - }); - Cursor threadsCursor = root.setArray("threads"); for (var threadLockStats : threadLockStatsList) { Optional<LockAttempt> ongoingLockAttempt = threadLockStats.getTopMostOngoingLockAttempt(); @@ -102,18 +82,45 @@ public class LocksResponse extends HttpResponse { Cursor recordingsCursor = root.setArray("recordings"); historicRecordings.forEach(recording -> setRecording(recordingsCursor.addObject(), recording)); } + + Cursor lockPathsCursor = root.setArray("lock-paths"); + lockMetricsByPath.forEach((lockPath, lockMetrics) -> { + Cursor lockPathCursor = lockPathsCursor.addObject(); + lockPathCursor.setString("path", lockPath); + setNonZeroLong(lockPathCursor, "acquireCount", lockMetrics.getCumulativeAcquireCount()); + setNonZeroLong(lockPathCursor, "acquireFailedCount", lockMetrics.getCumulativeAcquireFailedCount()); + setNonZeroLong(lockPathCursor, "acquireTimedOutCount", lockMetrics.getCumulativeAcquireTimedOutCount()); + setNonZeroLong(lockPathCursor, "lockedCount", lockMetrics.getCumulativeAcquireSucceededCount()); + setNonZeroLong(lockPathCursor, "releaseCount", lockMetrics.getCumulativeReleaseCount()); + setNonZeroLong(lockPathCursor, "releaseFailedCount", lockMetrics.getCumulativeReleaseFailedCount()); + setNonZeroLong(lockPathCursor, "reentryCount", lockMetrics.getCumulativeReentryCount()); + setNonZeroLong(lockPathCursor, "deadlock", lockMetrics.getCumulativeDeadlockCount()); + setNonZeroLong(lockPathCursor, "nakedRelease", lockMetrics.getCumulativeNakedReleaseCount()); + setNonZeroLong(lockPathCursor, "acquireWithoutRelease", lockMetrics.getCumulativeAcquireWithoutReleaseCount()); + setNonZeroLong(lockPathCursor, "foreignRelease", lockMetrics.getCumulativeForeignReleaseCount()); + + setLatency(lockPathCursor, "acquire", lockMetrics.getAcquireLatencyMetrics()); + setLatency(lockPathCursor, "locked", lockMetrics.getLockedLatencyMetrics()); + }); + } + + private static void setNonZeroLong(Cursor cursor, String fieldName, long value) { + if (value != 0) { + cursor.setLong(fieldName, value); + } } private static void setLatency(Cursor cursor, String name, LatencyMetrics latencyMetrics) { - cursor.setDouble(name + "Latency", latencyMetrics.latencySeconds()); - cursor.setDouble(name + "MaxActiveLatency", latencyMetrics.maxActiveLatencySeconds()); - cursor.setDouble(name + "Hz", latencyMetrics.endHz()); - cursor.setDouble(name + "Load", latencyMetrics.load()); + setNonZeroDouble(cursor, name + "Latency", latencyMetrics.latencySeconds()); + setNonZeroDouble(cursor, name + "MaxActiveLatency", latencyMetrics.maxActiveLatencySeconds()); + setNonZeroDouble(cursor, name + "Hz", latencyMetrics.endHz()); + setNonZeroDouble(cursor, name + "Load", latencyMetrics.load()); } - private static double roundDouble(double value, int decimalPlaces) { - double factor = Math.pow(10, decimalPlaces); - return Math.round(value * factor) / factor; + private static void setNonZeroDouble(Cursor cursor, String fieldName, double value) { + if (Double.compare(value, 0.0) != 0) { + cursor.setDouble(fieldName, value); + } } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index 9332eb79f20..0d423333ce1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -59,7 +59,7 @@ public class AutoscalingIntegrationTest { tester.nodeRepository().applications().put(application, lock); } var scaledResources = autoscaler.suggest(application.clusters().get(cluster1.id()), - tester.nodeRepository().getNodes(application1)); + tester.nodeRepository().list(application1)); assertTrue(scaledResources.isPresent()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 5393aa7cfb8..a217d97ac27 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -52,7 +52,7 @@ public class AutoscalingTest { assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.clock().advance(Duration.ofDays(1)); - tester.addCpuMeasurements(0.25f, 1f, 60, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1, cluster1.id(), min, max).target()); @@ -62,7 +62,7 @@ public class AutoscalingTest { tester.deactivateRetired(application1, cluster1, scaledResources); - tester.clock().advance(Duration.ofDays(1)); + tester.clock().advance(Duration.ofDays(2)); tester.addCpuMeasurements(0.8f, 1f, 3, application1); assertTrue("Load change is large, but insufficient measurements for new config -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); @@ -74,6 +74,8 @@ public class AutoscalingTest { tester.assertResources("Scaling down to minimum since usage has gone down significantly", 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max).target()); + + var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); } /** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */ @@ -117,7 +119,7 @@ public class AutoscalingTest { tester.nodeRepository().getNodes(application1).stream() .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow); - tester.clock().advance(Duration.ofDays(1)); + tester.clock().advance(Duration.ofDays(2)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); // Changing min and max from slow to any ClusterResources min = new ClusterResources( 2, 1, @@ -225,6 +227,40 @@ public class AutoscalingTest { } @Test + public void not_using_out_of_service_measurements() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources(2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(5, 1, new NodeResources(100, 1000, 1000, 1)); + AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 2, 1, resources); + tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, false, true, 120, application1); + assertTrue("Not scaling up since nodes were measured while cluster was unstable", + tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); + } + + @Test + public void not_using_unstable_measurements() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources(2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(5, 1, new NodeResources(100, 1000, 1000, 1)); + AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 2, 1, resources); + tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, true, false, 120, application1); + assertTrue("Not scaling up since nodes were measured while cluster was unstable", + tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); + } + + @Test public void test_autoscaling_group_size_1() { NodeResources resources = new NodeResources(3, 100, 100, 1); ClusterResources min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1)); @@ -272,6 +308,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 2, new NodeResources(10, 100, 100, 1)); + tester.clock().advance(Duration.ofDays(1)); tester.addMemMeasurements(1.0f, 1f, 1000, application1); tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.9, 89.3, 62.5, @@ -290,7 +327,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2)); - tester.clock().advance(Duration.ofDays(1)); + tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, @@ -313,8 +350,8 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); assertTrue(tester.autoscale(application1, cluster1.id(), min, max).target().isEmpty()); - // Trying the same a day later causes autoscaling - tester.clock().advance(Duration.ofDays(1)); + // Trying the same later causes autoscaling + tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, @@ -376,7 +413,7 @@ public class AutoscalingTest { // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); - tester.clock().advance(Duration.ofDays(1)); + tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, @@ -385,6 +422,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); + tester.clock().advance(Duration.ofDays(2)); tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 3faa4c244ee..43302c4fe23 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -138,6 +138,7 @@ class AutoscalingTester { memory, disk, 0, + true, true)))); } } @@ -168,12 +169,18 @@ class AutoscalingTester { memory, disk, 0, + true, true)))); } } } - public void addMeasurements(float cpu, float memory, float disk, int generation, int count, ApplicationId applicationId) { + public void addMeasurements(float cpu, float memory, float disk, int generation, int count, ApplicationId applicationId) { + addMeasurements(cpu, memory, disk, generation, true, true, count, applicationId); + } + + public void addMeasurements(float cpu, float memory, float disk, int generation, boolean inService, boolean stable, + int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); @@ -183,7 +190,8 @@ class AutoscalingTester { memory, disk, generation, - true)))); + inService, + stable)))); } } } @@ -196,7 +204,7 @@ class AutoscalingTester { nodeRepository().applications().put(application, lock); } return autoscaler.autoscale(application.clusters().get(clusterId), - nodeRepository().getNodes(applicationId, Node.State.active)); + nodeRepository().list(applicationId, Node.State.active)); } public Autoscaler.Advice suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, @@ -207,7 +215,7 @@ class AutoscalingTester { nodeRepository().applications().put(application, lock); } return autoscaler.suggest(application.clusters().get(clusterId), - nodeRepository().getNodes(applicationId, Node.State.active)); + nodeRepository().list(applicationId, Node.State.active)); } public ClusterResources assertResources(String message, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java index dd991f15087..0a62e5b6a68 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java @@ -17,6 +17,7 @@ import java.util.ArrayList; import java.util.List; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class MetricsV2MetricsFetcherTest { @@ -70,14 +71,17 @@ public class MetricsV2MetricsFetcherTest { assertEquals(0.15, values.get(0).getSecond().memory(), delta); assertEquals(0.20, values.get(0).getSecond().disk(), delta); assertEquals(3, values.get(0).getSecond().generation(), delta); + assertTrue(values.get(0).getSecond().stable()); } { + httpClient.cannedResponse = cannedResponseForApplication2; try (Mutex lock = tester.nodeRepository().lock(application1)) { - tester.nodeRepository().write(tester.nodeRepository().getNodes(application1, Node.State.active) + tester.nodeRepository().write(tester.nodeRepository().getNodes(application2, Node.State.active) .get(0).retire(tester.clock().instant()), lock); } - assertTrue("No metrics fetching while unstable", fetcher.fetchMetrics(application1).isEmpty()); + List<Pair<String, MetricSnapshot>> values = new ArrayList<>(fetcher.fetchMetrics(application2)); + assertFalse(values.get(0).getSecond().stable()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java index dba19d675dd..48c90125ac8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java @@ -40,7 +40,7 @@ public class NodeMetricsDbTest { MetricsDb db = MetricsDb.createTestInstance(tester.nodeRepository()); Collection<Pair<String, MetricSnapshot>> values = new ArrayList<>(); for (int i = 0; i < 40; i++) { - values.add(new Pair<>(node0, new MetricSnapshot(clock.instant(), 0.9f, 0.6f, 0.6f, 0, true))); + values.add(new Pair<>(node0, new MetricSnapshot(clock.instant(), 0.9f, 0.6f, 0.6f, 0, true, false))); clock.advance(Duration.ofMinutes(120)); } db.add(values); @@ -50,7 +50,7 @@ public class NodeMetricsDbTest { assertEquals(35, measurementCount(db.getNodeTimeseries(clock.instant().minus(Duration.ofHours(72)), Set.of(node0)))); db.gc(); - assertEquals( 5, measurementCount(db.getNodeTimeseries(clock.instant().minus(Duration.ofHours(72)), Set.of(node0)))); + assertEquals(23, measurementCount(db.getNodeTimeseries(clock.instant().minus(Duration.ofHours(72)), Set.of(node0)))); } private int measurementCount(List<NodeTimeseries> measurements) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java index b97d5136485..1f61faed735 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; import com.yahoo.io.IOUtils; import com.yahoo.test.ManualClock; +import org.junit.Ignore; import org.junit.Test; import java.io.File; @@ -16,6 +17,7 @@ import java.util.Set; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; /** * Tests the Quest metrics db. @@ -34,6 +36,7 @@ public class QuestMetricsDbTest { ManualClock clock = new ManualClock("2020-10-01T00:00:00"); QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); Instant startTime = clock.instant(); + clock.advance(Duration.ofSeconds(1)); db.add(timeseries(1000, Duration.ofSeconds(1), clock, "host1", "host2", "host3")); @@ -106,9 +109,56 @@ public class QuestMetricsDbTest { assertEquals(24 * 10, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); db.gc(); - assertEquals(24 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + assertEquals(48 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); db.gc(); // no-op - assertEquals(24 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + assertEquals(48 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + } + + /** To manually test that we can read existing data */ + @Ignore + @Test + public void testReadingAndAppendingToExistingData() { + String dataDir = "data/QuestMetricsDbExistingData"; + if ( ! new File(dataDir).exists()) { + System.out.println("No existing data to check"); + return; + } + IOUtils.createDirectory(dataDir + "/metrics"); + ManualClock clock = new ManualClock("2020-10-01T00:00:00"); + clock.advance(Duration.ofSeconds(9)); // Adjust to last data written + QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); + + List<NodeTimeseries> timeseries = db.getNodeTimeseries(clock.instant().minus(Duration.ofSeconds(9)), Set.of("host1")); + assertFalse("Could read existing data", timeseries.isEmpty()); + assertEquals(10, timeseries.get(0).size()); + + System.out.println("Existing data read:"); + for (var snapshot : timeseries.get(0).asList()) + System.out.println(" " + snapshot); + + clock.advance(Duration.ofSeconds(1)); + db.add(timeseries(2, Duration.ofSeconds(1), clock, "host1")); + System.out.println("New data written and read:"); + timeseries = db.getNodeTimeseries(clock.instant().minus(Duration.ofSeconds(2)), Set.of("host1")); + for (var snapshot : timeseries.get(0).asList()) + System.out.println(" " + snapshot); + } + + /** To update data for the manual test above */ + @Ignore + @Test + public void updateExistingData() { + String dataDir = "data/QuestMetricsDbExistingData"; + IOUtils.recursiveDeleteDir(new File(dataDir)); + IOUtils.createDirectory(dataDir + "/metrics"); + ManualClock clock = new ManualClock("2020-10-01T00:00:00"); + QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); + Instant startTime = clock.instant(); + db.add(timeseries(10, Duration.ofSeconds(1), clock, "host1")); + + int added = db.getNodeTimeseries(startTime, Set.of("host1")).get(0).asList().size(); + System.out.println("Added " + added + " rows of data"); + db.close(); } private Collection<Pair<String, MetricSnapshot>> timeseries(int countPerHost, Duration sampleRate, ManualClock clock, @@ -121,6 +171,7 @@ public class QuestMetricsDbTest { i * 0.2, i * 0.4, i % 100, + true, true))); clock.advance(sampleRate); } @@ -136,8 +187,10 @@ public class QuestMetricsDbTest { i * 0.2, i * 0.4, i % 100, - true))); + true, + false))); } return timeseries; } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 4b14174488e..0c1a59c883d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -6,6 +6,8 @@ import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import org.junit.Test; @@ -13,9 +15,10 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.List; - +import java.util.Optional; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** @@ -27,7 +30,7 @@ import static org.junit.Assert.assertTrue; public class AutoscalingMaintainerTest { @Test - public void testAutoscalingMaintainer() { + public void test_autoscaling_maintainer() { ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); @@ -53,6 +56,7 @@ public class AutoscalingMaintainerTest { new ClusterResources(10, 1, new NodeResources(6.5, 9, 20, 0.1)), false, true)); + tester.clock().advance(Duration.ofMinutes(10)); tester.maintainer().maintain(); // noop assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); assertTrue(tester.deployer().lastDeployTime(app2).isEmpty()); @@ -60,6 +64,7 @@ public class AutoscalingMaintainerTest { tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app2); + tester.clock().advance(Duration.ofMinutes(10)); tester.maintainer().maintain(); assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); // since autoscaling is off assertTrue(tester.deployer().lastDeployTime(app2).isPresent()); @@ -84,16 +89,18 @@ public class AutoscalingMaintainerTest { // Causes autoscaling tester.clock().advance(Duration.ofSeconds(1)); + tester.clock().advance(Duration.ofMinutes(10)); Instant firstMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); assertTrue(tester.deployer().lastDeployTime(app1).isPresent()); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); - assertEquals(1, events.size()); - assertEquals(2, events.get(0).from().nodes()); - assertEquals(4, events.get(0).to().nodes()); - assertEquals(1, events.get(0).generation()); - assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(0).at().toEpochMilli()); + assertEquals(2, events.size()); + assertEquals(Optional.of(firstMaintenanceTime), events.get(0).completion()); + assertEquals(2, events.get(1).from().nodes()); + assertEquals(4, events.get(1).to().nodes()); + assertEquals(1, events.get(1).generation()); + assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli()); // Measure overload still, since change is not applied, but metrics are discarded tester.clock().advance(Duration.ofSeconds(1)); @@ -110,13 +117,18 @@ public class AutoscalingMaintainerTest { assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // Add measurement of the expected generation, leading to rescaling - tester.clock().advance(Duration.ofHours(2)); + // - record scaling completion + tester.clock().advance(Duration.ofMinutes(5)); + tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 1, app1); + tester.maintainer().maintain(); + // - measure underload + tester.clock().advance(Duration.ofHours(1)); tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1); Instant lastMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); - assertEquals(2, events.get(0).generation()); + assertEquals(2, events.get(2).generation()); } @Test @@ -128,4 +140,90 @@ public class AutoscalingMaintainerTest { AutoscalingMaintainer.toString(new ClusterResources(4, 2, new NodeResources(1, 2, 4, 1)))); } + @Test + public void test_scaling_event_recording() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + NodeResources lowResources = new NodeResources(4, 4, 10, 0.1); + NodeResources highResources = new NodeResources(8, 8, 20, 0.1); + Capacity app1Capacity = Capacity.from(new ClusterResources(2, 1, lowResources), + new ClusterResources(4, 2, highResources)); + var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity)); + + // deploy + tester.deploy(app1, cluster1, app1Capacity); + + for (int i = 0; i < 20; i++) { + // Record completion to keep scaling window at minimum + tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 1, app1); + tester.maintainer().maintain(); + + tester.clock().advance(Duration.ofDays(1)); + + if (i % 2 == 0) // high load + tester.addMeasurements(0.9f, 0.9f, 0.9f, i, 200, app1); + else // low load + tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 200, app1); + tester.maintainer().maintain(); + } + + assertEquals(Cluster.maxScalingEvents, tester.cluster(app1, cluster1).scalingEvents().size()); + } + + @Test + public void test_autoscaling_window() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + NodeResources lowResources = new NodeResources(4, 4, 10, 0.1); + NodeResources highResources = new NodeResources(8, 8, 20, 0.1); + Capacity app1Capacity = Capacity.from(new ClusterResources(2, 1, lowResources), + new ClusterResources(4, 2, highResources)); + var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity)); + ManualClock clock = tester.clock(); + + // deploy + tester.deploy(app1, cluster1, app1Capacity); + + autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester); + autoscale( true, Duration.ofMinutes(19), Duration.ofMinutes(10), clock, app1, cluster1, tester); + autoscale( true, Duration.ofMinutes(40), Duration.ofMinutes(20), clock, app1, cluster1, tester); + } + + private void autoscale(boolean down, Duration completionTime, Duration expectedWindow, + ManualClock clock, ApplicationId application, ClusterSpec cluster, + AutoscalingMaintainerTester tester) { + long generation = tester.cluster(application, cluster).lastScalingEvent().get().generation(); + tester.maintainer().maintain(); + assertFalse("Not measured to be on the last generation yet", + tester.cluster(application, cluster).lastScalingEvent().get().completion().isPresent()); + + clock.advance(completionTime); + float load = down ? 0.1f : 1.0f; + tester.addMeasurements(load, load, load, generation, 200, application); + tester.maintainer().maintain(); + assertEvent("Measured completion of the last scaling event, but no new autoscaling yet", + generation, Optional.of(clock.instant()), + tester.cluster(application, cluster).lastScalingEvent().get()); + if (down) + clock.advance(expectedWindow.minus(completionTime).plus(expectedWindow.multipliedBy(2))); + else + clock.advance(expectedWindow.minus(completionTime)); + + tester.addMeasurements(load, load, load, generation, 200, application); + tester.maintainer().maintain(); + assertEquals("We passed window duration so a new autoscaling is started: " + + tester.cluster(application, cluster).autoscalingStatus(), + generation + 1, + tester.cluster(application, cluster).lastScalingEvent().get().generation()); + } + + private void assertEvent(String explanation, + long expectedGeneration, Optional<Instant> expectedCompletion, ScalingEvent event) { + assertEquals(explanation + ". Generation", expectedGeneration, event.generation()); + assertEquals(explanation + ". Generation " + expectedGeneration + ". Completion", + expectedCompletion, event.completion()); + } + + + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index fadcd40ad0a..f53ae1baad2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -14,6 +14,8 @@ import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -68,7 +70,7 @@ public class AutoscalingMaintainerTester { return provisioningTester.deploy(application, cluster, capacity); } - public void addMeasurements(float cpu, float mem, float disk, int generation, int count, ApplicationId applicationId) { + public void addMeasurements(float cpu, float mem, float disk, long generation, int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { for (Node node : nodes) @@ -77,10 +79,15 @@ public class AutoscalingMaintainerTester { mem, disk, generation, + true, true)))); } } + public Cluster cluster(ApplicationId application, ClusterSpec cluster) { + return nodeRepository().applications().get(application).get().cluster(cluster.id()).get(); + } + private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); b.addFlavor("flt", 30, 30, 40, 3, Flavor.Type.BARE_METAL); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java index 86507508e68..5dfd1193581 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java @@ -226,7 +226,7 @@ public class CapacityCheckerTester { return m; } public String toString() { - return String.format("%s/%s/%s/%d%s", clustertype, clusterid, group, index, retired ? "/retired" : ""); + return String.format("%s/%s/%s/%d%s%s", clustertype, clusterid, group, index, retired ? "/retired" : "", clustertype.isContent() ? "/stateful" : ""); } } static class OwnerModel { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 2833c4e11ba..c67f6657c7f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -15,9 +15,10 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; -import com.yahoo.vespa.flags.custom.HostCapacity; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.flags.custom.ClusterCapacity; +import com.yahoo.vespa.flags.custom.SharedHost; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Address; @@ -31,7 +32,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; -import org.junit.Ignore; import org.junit.Test; import java.time.Duration; @@ -43,6 +43,7 @@ import java.util.Optional; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; import static com.yahoo.vespa.hosted.provision.maintenance.DynamicProvisioningMaintainerTest.MockHostProvisioner.Behaviour; @@ -104,7 +105,7 @@ public class DynamicProvisioningMaintainerTest { @Test public void does_not_deprovision_when_preprovisioning_enabled() { var tester = new DynamicProvisioningTester().addInitialNodes(); - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(1, 3, 2, 1)), HostCapacity.class); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1, 3, 2, 1.0)), ClusterCapacity.class); Optional<Node> failedHost = tester.nodeRepository.getNode("host2"); assertTrue(failedHost.isPresent()); @@ -116,23 +117,162 @@ public class DynamicProvisioningMaintainerTest { @Test public void provision_deficit_and_deprovision_excess() { var tester = new DynamicProvisioningTester().addInitialNodes(); - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), - List.of(new HostCapacity(24, 64, 100, 2), - new HostCapacity(16, 24, 100, 1)), - HostCapacity.class); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, 48, 128, 1000, 10.0), + new ClusterCapacity(1, 16, 24, 100, 1.0)), + ClusterCapacity.class); + + assertEquals(0, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(11, tester.nodeRepository.getNodes().size()); assertTrue(tester.nodeRepository.getNode("host2").isPresent()); - assertEquals(0 ,tester.hostProvisioner.provisionedHosts.size()); + assertTrue(tester.nodeRepository.getNode("host2-1").isPresent()); + assertTrue(tester.nodeRepository.getNode("host3").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname100").isEmpty()); + assertTrue(tester.nodeRepository.getNode("hostname101").isEmpty()); - // failed host2 is removed - Optional<Node> failedHost = tester.nodeRepository.getNode("host2"); - assertTrue(failedHost.isPresent()); tester.maintainer.maintain(); - assertTrue("Failed host is deprovisioned", tester.nodeRepository.getNode(failedHost.get().hostname()).isEmpty()); - assertTrue("Host with matching resources is kept", tester.nodeRepository.getNode("host3").isPresent()); - // Two more hosts are provisioned with expected resources - NodeResources resources = new NodeResources(24, 64, 100, 1); - assertEquals(2, tester.provisionedHostsMatching(resources)); + assertEquals(2, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); + List<Node> nodesAfter = tester.nodeRepository.getNodes(); + assertEquals(11, nodesAfter.size()); // 2 removed, 2 added + assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.getNode("host2").isEmpty()); + assertTrue("Node on deprovisioned host removed", tester.nodeRepository.getNode("host2-1").isEmpty()); + assertTrue("Host satisfying 16-24-100-1 is kept", tester.nodeRepository.getNode("host3").isPresent()); + assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname100").isPresent()); + assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname101").isPresent()); + } + + @Test + public void preprovision_with_shared_host() { + var tester = new DynamicProvisioningTester().addInitialNodes(); + // Makes provisioned hosts 48-128-1000-10 + tester.hostProvisioner.provisionSharedHost("host4"); + + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, 1, 30, 20, 3.0)), + ClusterCapacity.class); + + assertEquals(0, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(11, tester.nodeRepository.getNodes().size()); + assertTrue(tester.nodeRepository.getNode("host2").isPresent()); + assertTrue(tester.nodeRepository.getNode("host2-1").isPresent()); + assertTrue(tester.nodeRepository.getNode("host3").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname100").isEmpty()); + + // The first cluster will be allocated to host3 and a new host hostname100. + // hostname100 will be a large shared host specified above. + tester.maintainer.maintain(); + verifyFirstMaintain(tester); + + // Second maintain should be a no-op, otherwise we did wrong in the first maintain. + tester.maintainer.maintain(); + verifyFirstMaintain(tester); + + // Add a second cluster equal to the first. It should fit on existing host3 and hostname100. + + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, 1, 30, 20, 3.0), + new ClusterCapacity(2, 1, 30, 20, 3.0)), + ClusterCapacity.class); + + tester.maintainer.maintain(); + verifyFirstMaintain(tester); + + // Change second cluster such that it doesn't fit on host3, but does on hostname100, + // and with a size of 2 it should allocate a new shared host. + // The node allocation code prefers to allocate to the shared hosts instead of host3 (at least + // in this test, due to skew), so host3 will be deprovisioned when hostname101 is provisioned. + // host3 is a 24-64-100-10 while hostname100 is 48-128-1000-10. + + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, 1, 30, 20, 3.0), + new ClusterCapacity(2, 24, 64, 100, 1.0)), + ClusterCapacity.class); + + tester.maintainer.maintain(); + + assertEquals(2, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); + assertEquals(10, tester.nodeRepository.getNodes().size()); // 3 removed, 2 added + assertTrue("preprovision capacity is prefered on shared hosts", tester.nodeRepository.getNode("host3").isEmpty()); + assertTrue(tester.nodeRepository.getNode("hostname100").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname101").isPresent()); + + // If the preprovision capacity is reduced, we should see shared hosts deprovisioned. + + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(1, 1, 30, 20, 3.0)), + ClusterCapacity.class); + + tester.maintainer.maintain(); + + assertEquals("one provisioned host has been deprovisioned, so there are 2 -> 1 provisioned hosts", + 1, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); + assertEquals(9, tester.nodeRepository.getNodes().size()); // 4 removed, 2 added + if (tester.nodeRepository.getNode("hostname100").isPresent()) { + assertTrue("hostname101 is superfluous and should have been deprovisioned", + tester.nodeRepository.getNode("hostname101").isEmpty()); + } else { + assertTrue("hostname101 is required for preprovision capacity", + tester.nodeRepository.getNode("hostname101").isPresent()); + } + + } + + private void verifyFirstMaintain(DynamicProvisioningTester tester) { + assertEquals(1, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); + assertEquals(10, tester.nodeRepository.getNodes().size()); // 2 removed, 1 added + assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.getNode("host2").isEmpty()); + assertTrue("Node on deprovisioned host removed", tester.nodeRepository.getNode("host2-1").isEmpty()); + assertTrue("One 1-30-20-3 node fits on host3", tester.nodeRepository.getNode("host3").isPresent()); + assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname100").isPresent()); + } + + @Test + public void verify_min_count_of_shared_hosts() { + // What's going on here? We are trying to verify the impact of varying the minimum number of + // shared hosts (SharedHost.minCount()). + // + // addInitialNodes() adds 4 tenant hosts: + // host1 shared !removable # not removable because it got child nodes w/allocation + // host2 !shared removable # not counted as a shared host because it is failed + // host3 shared removable + // host4 shared !removable # not removable because it got child nodes w/allocation + // + // Hosts 1, 3, and 4 count as "shared hosts" with respect to the minCount lower boundary. + // Hosts 3 and 4 are removable, that is they will be deprovisioned as excess hosts unless + // prevented by minCount. + + // minCount=0: All (2) removable hosts are deprovisioned + assertWithMinCount(0, 0, 2); + // minCount=1: The same thing happens, because there are 2 shared hosts left + assertWithMinCount(1, 0, 2); + assertWithMinCount(2, 0, 2); + // minCount=3: since we require 3 shared hosts, host3 is not deprovisioned. + assertWithMinCount(3, 0, 1); + // 4 shared hosts require we provision 1 shared host + assertWithMinCount(4, 1, 1); + // 5 shared hosts require we provision 2 shared hosts + assertWithMinCount(5, 2, 1); + assertWithMinCount(6, 3, 1); + } + + private void assertWithMinCount(int minCount, int provisionCount, int deprovisionCount) { + var tester = new DynamicProvisioningTester().addInitialNodes(); + tester.hostProvisioner.provisionSharedHost("host4"); + + tester.flagSource.withJacksonFlag(PermanentFlags.SHARED_HOST.id(), new SharedHost(null, minCount), SharedHost.class); + tester.maintainer.maintain(); + assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts); + + // Verify next maintain is a no-op + tester.maintainer.maintain(); + assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts); } @Test @@ -145,58 +285,95 @@ public class DynamicProvisioningMaintainerTest { assertTrue(tester.nodeRepository.getNode(host2.hostname()).isPresent()); } - @Ignore // TODO (hakon): Enable as test of min-capacity specified in flag @Test - public void provision_exact_capacity() { - var tester = new DynamicProvisioningTester(Cloud.builder().dynamicProvisioning(true).build()); - NodeResources resources1 = new NodeResources(24, 64, 100, 1); - NodeResources resources2 = new NodeResources(16, 24, 100, 1); - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1), - new HostCapacity(resources2.vcpu(), resources2.memoryGb(), resources2.diskGb(), 2)), - HostCapacity.class); + public void test_minimum_capacity() { + var tester = new DynamicProvisioningTester(); + NodeResources resources1 = new NodeResources(24, 64, 100, 10); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), resources1.bandwidthGbps())), + ClusterCapacity.class); tester.maintainer.maintain(); // Hosts are provisioned - assertEquals(1, tester.provisionedHostsMatching(resources1)); - assertEquals(2, tester.provisionedHostsMatching(resources2)); + assertEquals(2, tester.provisionedHostsMatching(resources1)); + assertEquals(0, tester.hostProvisioner.deprovisionedHosts); // Next maintenance run does nothing tester.assertNodesUnchanged(); - // Target capacity is changed - NodeResources resources3 = new NodeResources(48, 128, 1000, 1); - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1), - new HostCapacity(resources3.vcpu(), resources3.memoryGb(), resources3.diskGb(), 1)), - HostCapacity.class); + // Pretend shared-host flag has been set to host4's flavor + var sharedHostNodeResources = new NodeResources(48, 128, 1000, 10, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote); + tester.hostProvisioner.provisionSharedHost("host4"); - // Excess hosts are deprovisioned - tester.maintainer.maintain(); - assertEquals(1, tester.provisionedHostsMatching(resources1)); - assertEquals(0, tester.provisionedHostsMatching(resources2)); - assertEquals(1, tester.provisionedHostsMatching(resources3)); - assertEquals(2, tester.nodeRepository.getNodes(Node.State.deprovisioned).size()); + // Next maintenance run does nothing + tester.assertNodesUnchanged(); + + // Must be able to allocate 2 nodes with "no resource requirement" + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(2, 0, 0, 0, 0.0)), + ClusterCapacity.class); + + // Next maintenance run does nothing + tester.assertNodesUnchanged(); // Activate hosts - tester.maintainer.maintain(); // Resume provisioning of new hosts List<Node> provisioned = tester.nodeRepository.list().state(Node.State.provisioned).asList(); tester.nodeRepository.setReady(provisioned, Agent.system, this.getClass().getSimpleName()); tester.provisioningTester.activateTenantHosts(); // Allocating nodes to a host does not result in provisioning of additional capacity ApplicationId application = ProvisioningTester.applicationId(); + NodeResources applicationNodeResources = new NodeResources(4, 8, 50, 0.1); tester.provisioningTester.deploy(application, - Capacity.from(new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1)))); + Capacity.from(new ClusterResources(2, 1, applicationNodeResources))); assertEquals(2, tester.nodeRepository.list().owner(application).size()); tester.assertNodesUnchanged(); // Clearing flag does nothing - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(), HostCapacity.class); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(), ClusterCapacity.class); + tester.assertNodesUnchanged(); + + // Increasing the capacity provisions additional hosts + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(3, 0, 0, 0, 0.0)), + ClusterCapacity.class); + assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources)); + assertTrue(tester.nodeRepository.getNode("hostname102").isEmpty()); + tester.maintainer.maintain(); + assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources)); + assertTrue(tester.nodeRepository.getNode("hostname102").isPresent()); + + // Next maintenance run does nothing tester.assertNodesUnchanged(); - // Capacity reduction does not remove host with children - tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1)), - HostCapacity.class); + // Requiring >0 capacity does nothing as long as it fits on the 3 hosts + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(3, + resources1.vcpu() - applicationNodeResources.vcpu(), + resources1.memoryGb() - applicationNodeResources.memoryGb(), + resources1.diskGb() - applicationNodeResources.diskGb(), + resources1.bandwidthGbps() - applicationNodeResources.bandwidthGbps())), + ClusterCapacity.class); tester.assertNodesUnchanged(); + + // But requiring a bit more in the cluster => provisioning of 2 shared hosts. + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), + List.of(new ClusterCapacity(3, + resources1.vcpu() - applicationNodeResources.vcpu() + 1, + resources1.memoryGb() - applicationNodeResources.memoryGb() + 1, + resources1.diskGb() - applicationNodeResources.diskGb() + 1, + resources1.bandwidthGbps())), + ClusterCapacity.class); + + assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources)); + assertTrue(tester.nodeRepository.getNode("hostname102").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname103").isEmpty()); + assertTrue(tester.nodeRepository.getNode("hostname104").isEmpty()); + tester.maintainer.maintain(); + assertEquals(3, tester.provisionedHostsMatching(sharedHostNodeResources)); + assertTrue(tester.nodeRepository.getNode("hostname102").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname103").isPresent()); + assertTrue(tester.nodeRepository.getNode("hostname104").isPresent()); } @Test @@ -225,9 +402,7 @@ public class DynamicProvisioningMaintainerTest { private static final ApplicationId proxyApp = ApplicationId.from("vespa", "proxy", "default"); private static final NodeFlavors flavors = FlavorConfigBuilder.createDummies("default", "docker", "host2", "host3", "host4"); - private final InMemoryFlagSource flagSource = new InMemoryFlagSource().withListFlag(Flags.TARGET_CAPACITY.id(), - List.of(), - HostCapacity.class); + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); private final NodeRepository nodeRepository; private final MockHostProvisioner hostProvisioner; @@ -260,9 +435,10 @@ public class DynamicProvisioningMaintainerTest { List.of(createNode("host1", Optional.empty(), NodeType.host, Node.State.active, Optional.of(tenantHostApp)), createNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)), createNode("host1-2", Optional.of("host1"), NodeType.tenant, Node.State.failed, Optional.empty()), - createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, Optional.of(tenantApp)), + createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, Optional.of(tenantHostApp)), createNode("host2-1", Optional.of("host2"), NodeType.tenant, Node.State.failed, Optional.empty()), - createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty()), + createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty(), + "host3-1", "host3-2", "host3-3", "host3-4", "host3-5"), createNode("host4", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty()), createNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)), createNode("proxyhost1", Optional.empty(), NodeType.proxyhost, Node.State.provisioned, Optional.empty()), @@ -281,8 +457,9 @@ public class DynamicProvisioningMaintainerTest { return nodeRepository.database().addNodesInState(List.of(node), node.state(), Agent.system).get(0); } - private Node createNode(String hostname, Optional<String> parentHostname, NodeType nodeType, Node.State state, Optional<ApplicationId> application) { - Flavor flavor = nodeRepository.flavors().getFlavor(parentHostname.isPresent() ? "docker" : "host2").orElseThrow(); + private Node createNode(String hostname, Optional<String> parentHostname, NodeType nodeType, + Node.State state, Optional<ApplicationId> application, String... additionalHostnames) { + Flavor flavor = nodeRepository.flavors().getFlavor(parentHostname.isPresent() ? "docker" : "host3").orElseThrow(); Optional<Allocation> allocation = application .map(app -> new Allocation( app, @@ -290,8 +467,9 @@ public class DynamicProvisioningMaintainerTest { flavor.resources(), Generation.initial(), false)); + List<Address> addresses = Stream.of(additionalHostnames).map(Address::new).collect(Collectors.toList()); Node.Builder builder = Node.create("fake-id-" + hostname, hostname, flavor, state, nodeType) - .ipConfigWithEmptyPool(state == Node.State.active ? Set.of("::1") : Set.of()); + .ipConfig(new IP.Config(state == Node.State.active ? Set.of("::1") : Set.of(), Set.of(), addresses)); parentHostname.ifPresent(builder::parentHostname); allocation.ifPresent(builder::allocation); return builder.build(); @@ -299,7 +477,7 @@ public class DynamicProvisioningMaintainerTest { private long provisionedHostsMatching(NodeResources resources) { return hostProvisioner.provisionedHosts.stream() - .filter(host -> host.nodeResources().equals(resources)) + .filter(host -> host.generateHost().resources().compatibleWith(resources)) .count(); } @@ -319,27 +497,35 @@ public class DynamicProvisioningMaintainerTest { private int deprovisionedHosts = 0; private EnumSet<Behaviour> behaviours = EnumSet.noneOf(Behaviour.class); + private Optional<Flavor> provisionHostFlavor = Optional.empty(); public MockHostProvisioner(NodeFlavors flavors, MockNameResolver nameResolver) { this.flavors = flavors; this.nameResolver = nameResolver; } + public MockHostProvisioner provisionSharedHost(String flavorName) { + provisionHostFlavor = Optional.of(flavors.getFlavorOrThrow(flavorName)); + return this; + } + @Override public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndexes, NodeResources resources, ApplicationId applicationId, Version osVersion, HostSharing sharing) { - Flavor hostFlavor = flavors.getFlavors().stream() - .filter(f -> !f.isDocker()) - .filter(f -> f.resources().compatibleWith(resources)) - .findFirst() - .orElseThrow(() -> new IllegalArgumentException("No host flavor found satisfying " + resources)); + Flavor hostFlavor = provisionHostFlavor + .orElseGet(() -> flavors.getFlavors().stream() + .filter(f -> !f.isDocker()) + .filter(f -> f.resources().compatibleWith(resources)) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("No host flavor found satisfying " + resources))); + List<ProvisionedHost> hosts = new ArrayList<>(); for (int index : provisionIndexes) { hosts.add(new ProvisionedHost("host" + index, "hostname" + index, hostFlavor, Optional.empty(), - List.of(new Address("nodename" + index)), + createAddressesForHost(hostFlavor, index), resources, osVersion)); } @@ -347,6 +533,13 @@ public class DynamicProvisioningMaintainerTest { return hosts; } + private List<Address> createAddressesForHost(Flavor flavor, int hostIndex) { + long numAddresses = Math.max(1, Math.round(flavor.resources().bandwidthGbps())); + return IntStream.range(0, (int) numAddresses) + .mapToObj(i -> new Address("nodename" + hostIndex + "_" + i)) + .collect(Collectors.toList()); + } + @Override public List<Node> provision(Node host, Set<Node> children) throws FatalProvisioningException { if (behaviours.contains(Behaviour.failProvisioning)) throw new FatalProvisioningException("Failed to provision node(s)"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index a25858c034f..3e4887b6998 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -147,25 +147,14 @@ public class MetricsReporterTest { // Verify sum of values across dimensions, and remove these metrics to avoid checking against // metric.values below, which is not sensitive to dimensions. - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquire", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireFailed", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.locked", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.release", 3); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.releaseFailed", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.reentry", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.nakedRelease", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireWithoutRelease", 0); - verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.foreignRelease", 0); - metric.remove("lockAttempt.acquireLatency"); metric.remove("lockAttempt.acquireMaxActiveLatency"); metric.remove("lockAttempt.acquireHz"); metric.remove("lockAttempt.acquireLoad"); metric.remove("lockAttempt.lockedLatency"); - metric.remove("lockAttempt.lockedMaxActiveLatency"); - metric.remove("lockAttempt.lockedHz"); metric.remove("lockAttempt.lockedLoad"); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.acquireTimedOut", 0); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.deadlock", 0); + verifyAndRemoveIntegerMetricSum(metric, "lockAttempt.errors", 0); assertEquals(expectedMetrics, new TreeMap<>(metric.values)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index d403affc292..d4dbc6f55a5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Report; import com.yahoo.vespa.hosted.provision.node.Reports; import org.junit.Test; @@ -233,7 +234,7 @@ public class NodeFailerTest { assertEquals(2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size()); assertEquals(Node.State.failed, tester.nodeRepository.getNode(readyFail1.hostname()).get().state()); assertEquals(Node.State.failed, tester.nodeRepository.getNode(readyFail2.hostname()).get().state()); - + String downHost1 = tester.nodeRepository.getNodes(NodeFailTester.app1, Node.State.active).get(1).hostname(); String downHost2 = tester.nodeRepository.getNodes(NodeFailTester.app2, Node.State.active).get(3).hostname(); tester.serviceMonitor.setHostDown(downHost1); @@ -309,6 +310,36 @@ public class NodeFailerTest { } @Test + public void re_activate_grace_period_test() { + NodeFailTester tester = NodeFailTester.withTwoApplications(); + String downNode = tester.nodeRepository.getNodes(NodeFailTester.app1, Node.State.active).get(1).hostname(); + + tester.serviceMonitor.setHostDown(downNode); + tester.allNodesMakeAConfigRequestExcept(); + tester.runMaintainers(); + assertEquals(0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + + tester.clock.advance(Duration.ofMinutes(75)); + tester.allNodesMakeAConfigRequestExcept(); + tester.runMaintainers(); + assertEquals(1, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(Node.State.failed, tester.nodeRepository.getNode(downNode).get().state()); + + // Re-activate the node. It is still down, but should not be failed out until the grace period has passed again + tester.nodeRepository.reactivate(downNode, Agent.system, getClass().getSimpleName()); + tester.clock.advance(Duration.ofMinutes(30)); + tester.allNodesMakeAConfigRequestExcept(); + tester.runMaintainers(); + assertEquals(0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + + tester.clock.advance(Duration.ofMinutes(45)); + tester.allNodesMakeAConfigRequestExcept(); + tester.runMaintainers(); + assertEquals(1, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size()); + assertEquals(Node.State.failed, tester.nodeRepository.getNode(downNode).get().state()); + } + + @Test public void node_failing_can_allocate_spare() { var resources = new NodeResources(1, 20, 15, 1); Capacity capacity = Capacity.from(new ClusterResources(3, 1, resources), false, true); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java index 722911569de..4f0b0d55742 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java @@ -19,6 +19,7 @@ import java.util.Set; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** @@ -51,9 +52,8 @@ public class NodeMetricsDbMaintainerTest { List<MetricSnapshot> allSnapshots = timeseriesList.stream() .flatMap(timeseries -> timeseries.asList().stream()) .collect(Collectors.toList()); - assertEquals("Snapshot from the node not in service is filtered out", - 1, allSnapshots.size()); - assertEquals(0.14, allSnapshots.get(0).cpu(), 0.000001); + assertTrue(allSnapshots.stream().anyMatch(snapshot -> snapshot.inService())); + assertTrue(allSnapshots.stream().anyMatch(snapshot -> ! snapshot.inService())); } private static class MockHttpClient implements MetricsV2MetricsFetcher.HttpClient { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index 36d088a59df..2abe5ed7ebf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -5,8 +5,8 @@ import com.yahoo.component.Version; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.curator.mock.MockCurator; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -142,7 +142,7 @@ public class NodeRebooterTest { } private static ProvisioningTester createTester(Duration rebootInterval, InMemoryFlagSource flagSource) { - flagSource = flagSource.withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays()); + flagSource = flagSource.withIntFlag(PermanentFlags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays()); ProvisioningTester tester = new ProvisioningTester.Builder().flagSource(flagSource).build(); tester.clock().setInstant(Instant.ofEpochMilli(1605522619000L)); // Use a fixed random seed ((MockCurator) tester.getCurator()).setZooKeeperEnsembleConnectionSpec("zk1.host:1,zk2.host:2,zk3.host:3"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index 55a183c8ec1..cd188bc017f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -51,7 +51,7 @@ public class PeriodicApplicationMaintainerTest { @After public void after() { - this.fixture.maintainer.close(); + this.fixture.maintainer.awaitShutdown(); } @Test(timeout = 60_000) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 15966a4c44b..d1fc13c9796 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -56,6 +56,7 @@ public class ScalingSuggestionsMaintainerTest { new ClusterResources(10, 1, new NodeResources(6.5, 5, 15, 0.1)), false, true)); + tester.clock().advance(Duration.ofHours(13)); addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository(), metricsDb); addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository(), metricsDb); @@ -81,6 +82,7 @@ public class ScalingSuggestionsMaintainerTest { memory, disk, generation, + true, true)))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index e63f31cf304..06473e60712 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -45,7 +45,8 @@ public class ApplicationSerializerTest { List.of(new ScalingEvent(new ClusterResources(10, 5, minResources), new ClusterResources(12, 6, minResources), 7L, - Instant.ofEpochMilli(12345L))), + Instant.ofEpochMilli(12345L), + Optional.of(Instant.ofEpochMilli(67890L)))), "Autoscaling status")); Application original = new Application(ApplicationId.from("myTenant", "myApplication", "myInstance"), clusters); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java index 8b9d60aeaf4..e5333ea4186 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java @@ -79,7 +79,7 @@ public class NodeSerializerTest { node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), - ClusterMembership.from("content/myId/0/0", Vtag.currentVersion, Optional.empty()), + ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), requestedResources, clock.instant()); assertEquals(1, node.history().events().size()); @@ -134,7 +134,7 @@ public class NodeSerializerTest { " \"applicationId\" : \"myApplication\",\n" + " \"tenantId\" : \"myTenant\",\n" + " \"instanceId\" : \"myInstance\",\n" + - " \"serviceId\" : \"content/myId/0/0\",\n" + + " \"serviceId\" : \"content/myId/0/0/stateful\",\n" + " \"restartGeneration\" : 3,\n" + " \"currentRestartGeneration\" : 4,\n" + " \"removable\" : true,\n" + @@ -167,7 +167,7 @@ public class NodeSerializerTest { node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), - ClusterMembership.from("content/myId/0/0", Vtag.currentVersion, Optional.empty()), + ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), node.flavor().resources(), clock.instant()); assertEquals(1, node.history().events().size()); @@ -217,7 +217,7 @@ public class NodeSerializerTest { node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), - ClusterMembership.from("content/myId/0/0", Vtag.currentVersion, Optional.empty()), + ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), node.flavor().resources(), clock.instant()); @@ -315,7 +315,7 @@ public class NodeSerializerTest { " \"hostname\" : \"myHostname\",\n" + " \"ipAddresses\" : [\"127.0.0.1\"],\n" + " \"instance\": {\n" + - " \"serviceId\": \"content/myId/0/0\",\n" + + " \"serviceId\": \"content/myId/0/0/stateful\",\n" + " \"wantedVespaVersion\": \"6.42.2\"\n" + " }\n" + "}"; @@ -408,7 +408,7 @@ public class NodeSerializerTest { node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), - ClusterMembership.from("content/myId/0/0", Vtag.currentVersion, Optional.empty()), + ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), node.flavor().resources(), clock.instant()); assertTrue(node.allocation().isPresent()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java index e046bc0a512..f368f4d139c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java @@ -75,7 +75,6 @@ public class DockerProvisioningTest { public void refuses_to_activate_on_non_active_host() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); - ApplicationId zoneApplication = ProvisioningTester.applicationId(); List<Node> parents = tester.makeReadyNodes(10, new NodeResources(2, 4, 20, 2), NodeType.host, 1); for (Node parent : parents) tester.makeReadyVirtualDockerNodes(1, dockerResources, parent.hostname()); @@ -90,11 +89,8 @@ public class DockerProvisioningTest { fail("Expected the allocation to fail due to parent hosts not being active yet"); } catch (OutOfCapacityException expected) { } - // Activate the zone-app, thereby allocating the parents - List<HostSpec> hosts = tester.prepare(zoneApplication, - ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("zone-app")).vespaVersion(wantedVespaVersion).build(), - Capacity.fromRequiredNodeType(NodeType.host)); - tester.activate(zoneApplication, hosts); + // Activate the hosts, thereby allocating the parents + tester.activateTenantHosts(); // Try allocating tenants again List<HostSpec> nodes = tester.prepare(application1, @@ -412,10 +408,6 @@ public class DockerProvisioningTest { return nodes.asList().stream().map(Node::parentHostname).map(Optional::get).collect(Collectors.toSet()); } - private void prepareAndActivate(ApplicationId application, int nodeCount, boolean exclusive, ProvisioningTester tester) { - prepareAndActivate(application, nodeCount, exclusive, dockerResources, tester); - } - private void prepareAndActivate(ApplicationId application, int nodeCount, boolean exclusive, NodeResources resources, ProvisioningTester tester) { Set<HostSpec> hosts = new HashSet<>(tester.prepare(application, ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("myContainer")).vespaVersion("6.39").exclusive(exclusive).build(), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java index c51ef7250e2..7a636a030ec 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java @@ -33,6 +33,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; /** * @author mpolden @@ -236,6 +237,31 @@ public class LoadBalancerProvisionerTest { assertEquals(cluster, lbs.get().get(0).id().cluster()); } + @Test + public void reject_load_balancers_with_clashing_names() { + ApplicationId instance1 = ApplicationId.from("t1", "a1", "default"); + ApplicationId instance2 = ApplicationId.from("t1", "a1", "dev"); + ApplicationId instance3 = ApplicationId.from("t1", "a1", "qrs"); + ClusterSpec.Id devCluster = ClusterSpec.Id.from("dev"); + ClusterSpec.Id defaultCluster = ClusterSpec.Id.from("default"); + + // instance1 is deployed + tester.activate(instance1, prepare(instance1, clusterRequest(ClusterSpec.Type.container, devCluster))); + + // instance2 clashes because cluster name matches instance1 + try { + prepare(instance2, clusterRequest(ClusterSpec.Type.container, defaultCluster)); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) { + } + + // instance2 changes cluster name and does not clash + tester.activate(instance2, prepare(instance2, clusterRequest(ClusterSpec.Type.container, ClusterSpec.Id.from("qrs")))); + + // instance3 clashes because instance name matches instance2 cluster + tester.activate(instance3, prepare(instance3, clusterRequest(ClusterSpec.Type.container, defaultCluster))); + } + private void dirtyNodesOf(ApplicationId application) { tester.nodeRepository().setDirty(tester.nodeRepository().getNodes(application), Agent.system, this.getClass().getSimpleName()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 86427fe30ae..3945c518a77 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -485,7 +485,7 @@ public class NodesV2ApiTest { "{\"message\":\"Moved host2.yahoo.com to parked\"}"); tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/ready/host2.yahoo.com", new byte[0], Request.Method.PUT), - 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot make parked host host2.yahoo.com allocated to tenant2.application2.instance2 as 'content/id2/0/0' available for new allocation as it is not in state [dirty]\"}"); + 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot make parked host host2.yahoo.com allocated to tenant2.application2.instance2 as 'content/id2/0/0/stateful' available for new allocation as it is not in state [dirty]\"}"); // (... while dirty then ready works (the ready move will be initiated by node maintenance)) assertResponse(new Request("http://localhost:8080/nodes/v2/state/dirty/host2.yahoo.com", new byte[0], Request.Method.PUT), @@ -502,7 +502,7 @@ public class NodesV2ApiTest { // Attempt to DELETE allocated node tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com", new byte[0], Request.Method.DELETE), - 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"active child node host4.yahoo.com allocated to tenant3.application3.instance3 as 'content/id3/0/0' is currently allocated and cannot be removed\"}"); + 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"active child node host4.yahoo.com allocated to tenant3.application3.instance3 as 'content/id3/0/0/stateful' is currently allocated and cannot be removed\"}"); // PUT current restart generation with string instead of long tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com", diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/capacity-zone.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/capacity-zone.json index f6b2f96023a..c3857e9c8ee 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/capacity-zone.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/capacity-zone.json @@ -4,7 +4,7 @@ "failedTenantParent": "dockerhost1.yahoo.com", "failedTenant": "host4.yahoo.com", "failedTenantResources": "[vcpu: 1.0, memory: 4.0 Gb, disk 100.0 Gb, bandwidth: 1.0 Gbps, storage type: local]", - "failedTenantAllocation": "allocated to tenant3.application3.instance3 as 'content/id3/0/0'", + "failedTenantAllocation": "allocated to tenant3.application3.instance3 as 'content/id3/0/0/stateful'", "hostCandidateRejectionReasons": { "singularReasonFailures": { "insufficientVcpu": 0, @@ -43,4 +43,4 @@ } ] } -}
\ No newline at end of file +} |