diff options
author | Harald Musum <musum@verizonmedia.com> | 2020-11-26 11:43:38 +0100 |
---|---|---|
committer | Harald Musum <musum@verizonmedia.com> | 2020-11-26 11:43:38 +0100 |
commit | 3838f731e99e2a24871b3a90eb40d18579a56e40 (patch) | |
tree | 7e07cd3a87782c3f1a025e25127c8cdf5e2039d7 /node-repository | |
parent | c2895954f3465b6cdda26c84ecb6a9d1fb8e4a0d (diff) | |
parent | 693277054fd2f122ae40aa011e848526fad8a64e (diff) |
Merge branch 'master' into revert-14062-revert-14057-hmusum/upgrade-to-curator-4
Diffstat (limited to 'node-repository')
33 files changed, 669 insertions, 286 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index d0ee6229428..00327dc0002 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -94,7 +94,7 @@ public final class Node implements Nodelike { requireNonEmpty(ipConfig.primary(), "Active node " + hostname + " must have at least one valid IP address"); if (parentHostname.isPresent()) { - if (!ipConfig.pool().isEmpty()) throw new IllegalArgumentException("A child node cannot have an IP address pool"); + if (!ipConfig.pool().getIpSet().isEmpty()) throw new IllegalArgumentException("A child node cannot have an IP address pool"); if (modelName.isPresent()) throw new IllegalArgumentException("A child node cannot have model name set"); if (switchHostname.isPresent()) throw new IllegalArgumentException("A child node cannot have switch hostname set"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 05bdfd25b76..86795767710 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -165,7 +165,7 @@ public class NodeRepository extends AbstractComponent { this.osVersions = new OsVersions(this); this.infrastructureVersions = new InfrastructureVersions(db); this.firmwareChecks = new FirmwareChecks(db, clock); - this.containerImages = new ContainerImages(db, containerImage, flagSource); + this.containerImages = new ContainerImages(db, containerImage); this.jobControl = new JobControl(new JobControlFlags(db, flagSource)); this.applications = new Applications(db); this.spareCount = spareCount; @@ -460,7 +460,7 @@ public class NodeRepository extends AbstractComponent { .map(node -> { if (node.state() != State.provisioned && node.state() != State.dirty) illegal("Can not set " + node + " ready. It is not provisioned or dirty."); - if (node.type() == NodeType.host && node.ipConfig().pool().isEmpty()) + if (node.type() == NodeType.host && node.ipConfig().pool().getIpSet().isEmpty()) illegal("Can not set host " + node + " ready. Its IP address pool is empty."); return node.withWantToRetire(false, false, Agent.system, clock.instant()); }) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java index fd92b5b0ca0..847b825a7a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java @@ -57,7 +57,7 @@ public class Application { public Application withCluster(ClusterSpec.Id id, boolean exclusive, ClusterResources min, ClusterResources max) { Cluster cluster = clusters.get(id); if (cluster == null) - cluster = new Cluster(id, exclusive, min, max, Optional.empty(), Optional.empty(), List.of()); + cluster = new Cluster(id, exclusive, min, max, Optional.empty(), Optional.empty(), List.of(), ""); else cluster = cluster.withConfiguration(exclusive, min, max); return with(cluster); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index a17ee081447..90133f7499e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -25,6 +25,7 @@ public class Cluster { private final Optional<ClusterResources> suggested; private final Optional<ClusterResources> target; private final List<ScalingEvent> scalingEvents; + private final String autoscalingStatus; public Cluster(ClusterSpec.Id id, boolean exclusive, @@ -32,7 +33,8 @@ public class Cluster { ClusterResources maxResources, Optional<ClusterResources> suggestedResources, Optional<ClusterResources> targetResources, - List<ScalingEvent> scalingEvents) { + List<ScalingEvent> scalingEvents, + String autoscalingStatus) { this.id = Objects.requireNonNull(id); this.exclusive = exclusive; this.min = Objects.requireNonNull(minResources); @@ -44,6 +46,7 @@ public class Cluster { else this.target = targetResources; this.scalingEvents = scalingEvents; + this.autoscalingStatus = autoscalingStatus; } public ClusterSpec.Id id() { return id; } @@ -73,21 +76,33 @@ public class Cluster { /** Returns the recent scaling events in this cluster */ public List<ScalingEvent> scalingEvents() { return scalingEvents; } + public Optional<ScalingEvent> lastScalingEvent() { + if (scalingEvents.isEmpty()) return Optional.empty(); + return Optional.of(scalingEvents.get(scalingEvents.size() - 1)); + } + + /** The latest autoscaling status of this cluster, or empty (never null) if none */ + public String autoscalingStatus() { return autoscalingStatus; } + public Cluster withConfiguration(boolean exclusive, ClusterResources min, ClusterResources max) { - return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } public Cluster withSuggested(Optional<ClusterResources> suggested) { - return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } public Cluster withTarget(Optional<ClusterResources> target) { - return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } public Cluster with(ScalingEvent scalingEvent) { // NOTE: We're just storing the latest scaling event so far - return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent)); + return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent), autoscalingStatus); + } + + public Cluster withAutoscalingStatus(String autoscalingStatus) { + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 1a8c4c8a6c2..d2c943794fe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -3,14 +3,16 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.time.Duration; +import java.time.Instant; import java.util.List; +import java.util.Objects; import java.util.Optional; -import java.util.logging.Logger; /** * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster @@ -20,8 +22,6 @@ import java.util.logging.Logger; */ public class Autoscaler { - private static final Logger log = Logger.getLogger(Autoscaler.class.getName()); - /** What cost difference factor is worth a reallocation? */ private static final double costDifferenceWorthReallocation = 0.1; /** What difference factor for a resource is worth a reallocation? */ @@ -55,39 +55,46 @@ public class Autoscaler { * @return scaling advice for this cluster */ public Advice autoscale(Cluster cluster, List<Node> clusterNodes) { - if (cluster.minResources().equals(cluster.maxResources())) return Advice.none(); // Shortcut + if (cluster.minResources().equals(cluster.maxResources())) return Advice.none("Autoscaling is disabled"); // Shortcut return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()); } private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) { - log.fine(() -> "Autoscale " + cluster.toString()); - - if (unstable(clusterNodes, nodeRepository)) { - log.fine(() -> "Unstable - Advice.none " + cluster.toString()); - return Advice.none(); - } + ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); + if (unstable(clusterNodes, nodeRepository)) + return Advice.none("Cluster change in progress"); - AllocatableClusterResources currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository, cluster.exclusive()); + AllocatableClusterResources currentAllocation = + new AllocatableClusterResources(clusterNodes, nodeRepository, cluster.exclusive()); ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository); - Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu, cluster); - Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory, cluster); - Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk, cluster); - if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Advice.none(); + int measurementsPerNode = clusterTimeseries.measurementsPerNode(); + if (measurementsPerNode < minimumMeasurementsPerNode(clusterType)) + return Advice.none("Collecting more data before making new scaling decisions" + + ": Has " + measurementsPerNode + " data points per node"); - var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation); + int nodesMeasured = clusterTimeseries.nodesMeasured(); + if (nodesMeasured != clusterNodes.size()) + return Advice.none("Collecting more data before making new scaling decisions" + + ": Has measurements from " + nodesMeasured + " but need from " + clusterNodes.size()); + + double cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); + double memoryLoad = clusterTimeseries.averageLoad(Resource.memory); + double diskLoad = clusterTimeseries.averageLoad(Resource.disk); + + var target = ResourceTarget.idealLoad(cpuLoad, memoryLoad, diskLoad, currentAllocation); Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(target, currentAllocation, limits, exclusive); - if (bestAllocation.isEmpty()) { - log.fine(() -> "bestAllocation.isEmpty: Advice.dontScale for " + cluster.toString()); - return Advice.dontScale(); - } - if (similar(bestAllocation.get(), currentAllocation)) { - log.fine(() -> "Current allocation similar: Advice.dontScale for " + cluster.toString()); - return Advice.dontScale(); - } + if (bestAllocation.isEmpty()) + return Advice.dontScale("No allocation changes are possible within configured limits"); + + if (similar(bestAllocation.get(), currentAllocation)) + return Advice.dontScale("Cluster is ideally scaled (within configured limits)"); + if (isDownscaling(bestAllocation.get(), currentAllocation) && recentlyScaled(cluster, clusterNodes)) + return Advice.dontScale("Waiting a while before scaling down"); + return Advice.scaleTo(bestAllocation.get().toAdvertisedClusterResources()); } @@ -106,10 +113,23 @@ public class Autoscaler { return Math.abs(r1 - r2) / (( r1 + r2) / 2) < threshold; } + /** Returns true if this reduces total resources in any dimension */ + private boolean isDownscaling(AllocatableClusterResources target, AllocatableClusterResources current) { + NodeResources targetTotal = target.toAdvertisedClusterResources().totalResources(); + NodeResources currentTotal = current.toAdvertisedClusterResources().totalResources(); + return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers()); + } + + private boolean recentlyScaled(Cluster cluster, List<Node> clusterNodes) { + Duration downscalingDelay = downscalingDelay(clusterNodes.get(0).allocation().get().membership().cluster().type()); + return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) + .isAfter(nodeRepository.clock().instant().minus(downscalingDelay)); + } + /** The duration of the window we need to consider to make a scaling decision. See also minimumMeasurementsPerNode */ static Duration scalingWindow(ClusterSpec.Type clusterType) { if (clusterType.isContent()) return Duration.ofHours(12); - return Duration.ofHours(1); + return Duration.ofMinutes(30); } static Duration maxScalingWindow() { @@ -119,7 +139,16 @@ public class Autoscaler { /** Measurements are currently taken once a minute. See also scalingWindow */ static int minimumMeasurementsPerNode(ClusterSpec.Type clusterType) { if (clusterType.isContent()) return 60; - return 20; + return 7; + } + + /** + * We should wait a while before scaling down after a scaling event as a peak in usage + * indicates more peaks may arrive in the near future. + */ + static Duration downscalingDelay(ClusterSpec.Type clusterType) { + if (clusterType.isContent()) return Duration.ofHours(12); + return Duration.ofHours(1); } public static boolean unstable(List<Node> nodes, NodeRepository nodeRepository) { @@ -140,10 +169,12 @@ public class Autoscaler { private final boolean present; private final Optional<ClusterResources> target; + private final String reason; - private Advice(Optional<ClusterResources> target, boolean present) { + private Advice(Optional<ClusterResources> target, boolean present, String reason) { this.target = target; this.present = present; + this.reason = Objects.requireNonNull(reason); } /** @@ -158,10 +189,14 @@ public class Autoscaler { /** True if this provides advice (which may be to keep the current allocation) */ public boolean isPresent() { return present; } - private static Advice none() { return new Advice(Optional.empty(), false); } - private static Advice dontScale() { return new Advice(Optional.empty(), true); } - private static Advice scaleTo(ClusterResources target) { return new Advice(Optional.of(target), true); } + /** The reason for this advice */ + public String reason() { return reason; } + private static Advice none(String reason) { return new Advice(Optional.empty(), false, reason); } + private static Advice dontScale(String reason) { return new Advice(Optional.empty(), true, reason); } + private static Advice scaleTo(ClusterResources target) { + return new Advice(Optional.of(target), true, "Scaling due to load changes"); + } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index bb91b77dce5..e325e797ca5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -10,8 +10,6 @@ import java.time.Instant; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.logging.Logger; import java.util.stream.Collectors; /** @@ -21,10 +19,7 @@ import java.util.stream.Collectors; */ public class ClusterTimeseries { - private static final Logger log = Logger.getLogger(ClusterTimeseries.class.getName()); - private final List<Node> clusterNodes; - private final Map<String, Instant> startTimePerNode; /** The measurements for all hosts in this snapshot */ private final List<NodeTimeseries> nodeTimeseries; @@ -32,9 +27,10 @@ public class ClusterTimeseries { public ClusterTimeseries(Cluster cluster, List<Node> clusterNodes, MetricsDb db, NodeRepository nodeRepository) { this.clusterNodes = clusterNodes; ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - this.nodeTimeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), - clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet())); - this.startTimePerNode = metricStartTimes(cluster, clusterNodes, nodeRepository); + var allTimeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), + clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet())); + Map<String, Instant> startTimePerNode = metricStartTimes(cluster, clusterNodes, allTimeseries, nodeRepository); + nodeTimeseries = filterStale(allTimeseries, startTimePerNode); } /** @@ -43,6 +39,7 @@ public class ClusterTimeseries { */ private Map<String, Instant> metricStartTimes(Cluster cluster, List<Node> clusterNodes, + List<NodeTimeseries> nodeTimeseries, NodeRepository nodeRepository) { Map<String, Instant> startTimePerHost = new HashMap<>(); if ( ! cluster.scalingEvents().isEmpty()) { @@ -65,31 +62,22 @@ public class ClusterTimeseries { return startTimePerHost; } - /** - * Returns the average load of this resource in the measurement window, - * or empty if we do not have a reliable measurement across the cluster nodes. - */ - public Optional<Double> averageLoad(Resource resource, Cluster cluster) { - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - - List<NodeTimeseries> currentMeasurements = filterStale(nodeTimeseries, startTimePerNode); + /** Returns the average number of measurements per node */ + public int measurementsPerNode() { + int measurementCount = nodeTimeseries.stream().mapToInt(m -> m.size()).sum(); + return measurementCount / clusterNodes.size(); + } - // Require a total number of measurements scaling with the number of nodes, - // but don't require that we have at least that many from every node - int measurementCount = currentMeasurements.stream().mapToInt(m -> m.size()).sum(); - if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) { - log.fine(() -> "Too few measurements per node for " + cluster.toString() + ": measurementCount " + measurementCount + - " (" + nodeTimeseries.stream().mapToInt(m -> m.size()).sum() + " before filtering"); - return Optional.empty(); - } - if (currentMeasurements.size() != clusterNodes.size()) { - log.fine(() -> "Mssing measurements from some nodes for " + cluster.toString() + ": Has from " + currentMeasurements.size() + - "but need " + clusterNodes.size() + "(before filtering: " + nodeTimeseries.size() + ")"); - return Optional.empty(); - } + /** Returns the number of nodes measured in this */ + public int nodesMeasured() { + return nodeTimeseries.size(); + } - double measurementSum = currentMeasurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); - return Optional.of(measurementSum / measurementCount); + /** Returns the average load of this resource in this */ + public double averageLoad(Resource resource) { + int measurementCount = nodeTimeseries.stream().mapToInt(m -> m.size()).sum(); + double measurementSum = nodeTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + return measurementSum / measurementCount; } private double value(Resource resource, MetricSnapshot snapshot) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index b53f56e4743..809c54146d0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -17,7 +17,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import java.time.Duration; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; @@ -70,13 +69,13 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return; - log.fine(() -> "Autoscale " + application.toString()); - var advice = autoscaler.autoscale(cluster.get(), clusterNodes); - if (advice.isEmpty()) return; - - if ( ! cluster.get().targetResources().equals(advice.target())) { + application = application.with(cluster.get().withAutoscalingStatus(advice.reason())); + if (advice.isEmpty()) { + applications().put(application, deployment.applicationLock().get()); + } + else if ( ! cluster.get().targetResources().equals(advice.target())) { applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get()); if (advice.target().isPresent()) { logAutoscaling(advice.target().get(), applicationId, cluster.get(), clusterNodes); @@ -100,11 +99,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { } static String toString(ClusterResources r) { - return String.format(Locale.US, "%d%s * [vcpu: %.1f, memory: %.1f Gb, disk %.1f Gb]" + - " (total: [vcpu: %.1f, memory: %.1f Gb, disk: %.1f Gb])", - r.nodes(), r.groups() > 1 ? " (in " + r.groups() + " groups)" : "", - r.nodeResources().vcpu(), r.nodeResources().memoryGb(), r.nodeResources().diskGb(), - r.nodes() * r.nodeResources().vcpu(), r.nodes() * r.nodeResources().memoryGb(), r.nodes() * r.nodeResources().diskGb()); + return r + " (total: " + r.totalResources() + ")"; } private Map<ClusterSpec.Id, List<Node>> nodesByCluster(List<Node> applicationNodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3bf287a3e80..d2dcaaeae5b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; @@ -13,6 +14,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.curator.stats.LatencyMetrics; import com.yahoo.vespa.curator.stats.LockStats; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -26,12 +28,13 @@ import java.time.Duration; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; +import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; import static com.yahoo.config.provision.NodeResources.DiskSpeed.any; -import static com.yahoo.vespa.hosted.provision.Node.State.active; /** * @author oyving @@ -70,9 +73,39 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateLockMetrics(); updateDockerMetrics(nodes); updateTenantUsageMetrics(nodes); + updateRepairTicketMetrics(nodes); + updateAllocationMetrics(nodes); return true; } + private void updateAllocationMetrics(NodeList nodes) { + Map<ClusterKey, List<Node>> byCluster = nodes.stream() + .filter(node -> node.allocation().isPresent()) + .collect(Collectors.groupingBy(node -> new ClusterKey(node.allocation().get().owner(), node.allocation().get().membership().cluster().id()))); + byCluster.forEach((clusterKey, allocatedNodes) -> { + int activeNodes = 0; + int nonActiveNodes = 0; + for (var node : allocatedNodes) { + if (node.state() == State.active) { + activeNodes++; + } else { + nonActiveNodes++; + } + } + double nonActiveFraction; + if (activeNodes == 0) { // Cluster has been removed + nonActiveFraction = 1; + } else { + nonActiveFraction = (double) nonActiveNodes / (double) activeNodes; + } + Map<String, String> dimensions = new HashMap<>(dimensions(clusterKey.application)); + dimensions.put("clusterId", clusterKey.cluster.value()); + metric.set("nodes.active", activeNodes, getContext(dimensions)); + metric.set("nodes.nonActive", nonActiveNodes, getContext(dimensions)); + metric.set("nodes.nonActiveFraction", nonActiveFraction, getContext(dimensions)); + }); + } + private void updateZoneMetrics() { metric.set("zone.working", nodeRepository().isWorking() ? 1 : 0, null); } @@ -99,14 +132,12 @@ public class MetricsReporter extends NodeRepositoryMaintainer { Optional<Allocation> allocation = node.allocation(); if (allocation.isPresent()) { ApplicationId applicationId = allocation.get().owner(); - context = getContextAt( - "state", node.state().name(), - "host", node.hostname(), - "tenantName", applicationId.tenant().value(), - "applicationId", applicationId.serializedForm().replace(':', '.'), - "app", toApp(applicationId), - "clustertype", allocation.get().membership().cluster().type().name(), - "clusterid", allocation.get().membership().cluster().id().value()); + Map<String, String> dimensions = new HashMap<>(dimensions(applicationId)); + dimensions.put("state", node.state().name()); + dimensions.put("host", node.hostname()); + dimensions.put("clustertype", allocation.get().membership().cluster().type().name()); + dimensions.put("clusterid", allocation.get().membership().cluster().id().value()); + context = getContext(dimensions); long wantedRestartGeneration = allocation.get().restartGeneration().wanted(); metric.set("wantedRestartGeneration", wantedRestartGeneration, context); @@ -126,9 +157,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer { currentVersion.get().equals(wantedVersion); metric.set("wantToChangeVespaVersion", converged ? 0 : 1, context); } else { - context = getContextAt( - "state", node.state().name(), - "host", node.hostname()); + context = getContext(Map.of("state", node.state().name(), + "host", node.hostname())); } Optional<Version> currentVersion = node.status().vespaVersion(); @@ -211,24 +241,16 @@ public class MetricsReporter extends NodeRepositoryMaintainer { return version.getMinor() + version.getMicro() / 1000.0; } - private Metric.Context getContextAt(String... point) { - if (point.length % 2 != 0) - throw new IllegalArgumentException("Dimension specification comes in pairs"); - - Map<String, String> dimensions = new HashMap<>(); - for (int i = 0; i < point.length; i += 2) { - dimensions.put(point[i], point[i + 1]); - } - + private Metric.Context getContext(Map<String, String> dimensions) { return contextMap.computeIfAbsent(dimensions, metric::createContext); } private void updateNodeCountMetrics(NodeList nodes) { - Map<Node.State, List<Node>> nodesByState = nodes.nodeType(NodeType.tenant).asList().stream() - .collect(Collectors.groupingBy(Node::state)); + Map<State, List<Node>> nodesByState = nodes.nodeType(NodeType.tenant).asList().stream() + .collect(Collectors.groupingBy(Node::state)); // Count per state - for (Node.State state : Node.State.values()) { + for (State state : State.values()) { List<Node> nodesInState = nodesByState.getOrDefault(state, List.of()); metric.set("hostedVespa." + state.name() + "Hosts", nodesInState.size(), null); } @@ -237,7 +259,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { private void updateLockMetrics() { LockStats.getGlobal().getLockMetricsByPath() .forEach((lockPath, lockMetrics) -> { - Metric.Context context = getContextAt("lockPath", lockPath); + Metric.Context context = getContext(Map.of("lockPath", lockPath)); metric.set("lockAttempt.acquire", lockMetrics.getAndResetAcquireCount(), context); metric.set("lockAttempt.acquireFailed", lockMetrics.getAndResetAcquireFailedCount(), context); @@ -285,10 +307,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .map(node -> node.allocation().get().requestedResources().justNumbers()) .reduce(new NodeResources(0, 0, 0, 0, any), NodeResources::add); - var context = getContextAt( - "tenantName", applicationId.tenant().value(), - "applicationId", applicationId.serializedForm().replace(':', '.'), - "app", toApp(applicationId)); + var context = getContext(dimensions(applicationId)); metric.set("hostedVespa.docker.allocatedCapacityCpu", allocatedCapacity.vcpu(), context); metric.set("hostedVespa.docker.allocatedCapacityMem", allocatedCapacity.memoryGb(), context); @@ -297,24 +316,65 @@ public class MetricsReporter extends NodeRepositoryMaintainer { ); } + private void updateRepairTicketMetrics(NodeList nodes) { + nodes.nodeType(NodeType.host).stream() + .map(node -> node.reports().getReport("repairTicket")) + .flatMap(Optional::stream) + .map(report -> report.getInspector().field("status").asString()) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())) + .forEach((status, number) -> metric.set("hostedVespa.breakfixedHosts", number, getContext(Map.of("status", status)))); + } + + private static Map<String, String> dimensions(ApplicationId application) { + return Map.of("tenantName", application.tenant().value(), + "applicationId", application.serializedForm().replace(':', '.'), + "app", toApp(application)); + } + private static NodeResources getCapacityTotal(NodeList nodes) { - return nodes.hosts().state(active).asList().stream() - .map(host -> host.flavor().resources()) - .map(NodeResources::justNumbers) - .reduce(new NodeResources(0, 0, 0, 0, any), NodeResources::add); + return nodes.hosts().state(State.active).asList().stream() + .map(host -> host.flavor().resources()) + .map(NodeResources::justNumbers) + .reduce(new NodeResources(0, 0, 0, 0, any), NodeResources::add); } private static NodeResources getFreeCapacityTotal(NodeList nodes) { - return nodes.hosts().state(active).asList().stream() - .map(n -> freeCapacityOf(nodes, n)) - .map(NodeResources::justNumbers) - .reduce(new NodeResources(0, 0, 0, 0, any), NodeResources::add); + return nodes.hosts().state(State.active).asList().stream() + .map(n -> freeCapacityOf(nodes, n)) + .map(NodeResources::justNumbers) + .reduce(new NodeResources(0, 0, 0, 0, any), NodeResources::add); } private static NodeResources freeCapacityOf(NodeList nodes, Node dockerHost) { return nodes.childrenOf(dockerHost).asList().stream() - .map(node -> node.flavor().resources().justNumbers()) - .reduce(dockerHost.flavor().resources().justNumbers(), NodeResources::subtract); + .map(node -> node.flavor().resources().justNumbers()) + .reduce(dockerHost.flavor().resources().justNumbers(), NodeResources::subtract); + } + + private static class ClusterKey { + + private final ApplicationId application; + private final ClusterSpec.Id cluster; + + public ClusterKey(ApplicationId application, ClusterSpec.Id cluster) { + this.application = application; + this.cluster = cluster; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClusterKey that = (ClusterKey) o; + return application.equals(that.application) && + cluster.equals(that.cluster); + } + + @Override + public int hashCode() { + return Objects.hash(application, cluster); + } + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java index 41d6c1e5425..bac31c40418 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java @@ -20,6 +20,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import static com.yahoo.config.provision.NodeType.confighost; import static com.yahoo.config.provision.NodeType.controllerhost; @@ -254,18 +255,25 @@ public class IP { * @return an allocation from the pool, if any can be made */ public Optional<Allocation> findAllocation(LockedNodeList nodes, NameResolver resolver) { + if (ipAddresses.asSet().isEmpty()) { + // IP addresses have not yet been resolved and should be done later. + return findUnusedAddressStream(nodes) + .map(Allocation::ofAddress) + .findFirst(); + } + if (ipAddresses.protocol == IpAddresses.Protocol.ipv4) { - return findUnused(nodes).stream() + return findUnusedIpAddresses(nodes).stream() .findFirst() .map(addr -> Allocation.ofIpv4(addr, resolver)); } - var unusedAddresses = findUnused(nodes); + var unusedAddresses = findUnusedIpAddresses(nodes); var allocation = unusedAddresses.stream() .filter(IP::isV6) .findFirst() .map(addr -> Allocation.ofIpv6(addr, resolver)); - allocation.flatMap(Allocation::secondary).ifPresent(ipv4Address -> { + allocation.flatMap(Allocation::ipv4Address).ifPresent(ipv4Address -> { if (!unusedAddresses.contains(ipv4Address)) { throw new IllegalArgumentException("Allocation resolved " + ipv4Address + " from hostname " + allocation.get().hostname + @@ -276,17 +284,43 @@ public class IP { } /** - * Finds all unused addresses in this pool + * Finds all unused IP addresses in this pool * * @param nodes a list of all nodes in the repository */ - public Set<String> findUnused(NodeList nodes) { + public Set<String> findUnusedIpAddresses(NodeList nodes) { var unusedAddresses = new LinkedHashSet<>(getIpSet()); nodes.matching(node -> node.ipConfig().primary().stream().anyMatch(ip -> getIpSet().contains(ip))) .forEach(node -> unusedAddresses.removeAll(node.ipConfig().primary())); return Collections.unmodifiableSet(unusedAddresses); } + /** + * Returns the number of unused IP addresses in the pool, assuming any and all unaccounted for hostnames + * in the pool are resolved to exactly 1 IP address (or 2 with {@link IpAddresses.Protocol#dualStack}). + */ + public int eventuallyUnusedAddressCount(NodeList nodes) { + // The address pool is filled immediately upon provisioning in dynamically provisioned zones, + // and within short time the IP address pool is filled. For all other cases, the IP address + // pool is already filled. + // + // The count in this method relies on the size of the IP address pool if that's non-empty, + // otherwise fall back to the address/hostname pool. + + + Set<String> currentIpAddresses = this.ipAddresses.asSet(); + if (!currentIpAddresses.isEmpty()) { + return findUnusedIpAddresses(nodes).size(); + } + + return (int) findUnusedAddressStream(nodes).count(); + } + + private Stream<Address> findUnusedAddressStream(NodeList nodes) { + Set<String> hostnames = nodes.stream().map(Node::hostname).collect(Collectors.toSet()); + return addresses.stream().filter(address -> !hostnames.contains(address.hostname())); + } + public IpAddresses.Protocol getProtocol() { return ipAddresses.protocol; } @@ -299,10 +333,6 @@ public class IP { return addresses; } - public boolean isEmpty() { - return getIpSet().isEmpty(); - } - public Pool withIpAddresses(Set<String> ipAddresses) { return Pool.of(ipAddresses, addresses); } @@ -326,22 +356,17 @@ public class IP { } - /** An IP address allocation from a pool */ + /** An address allocation from a pool */ public static class Allocation { private final String hostname; - private final String primary; - private final Optional<String> secondary; - - private Allocation(String hostname, String primary, Optional<String> secondary) { - Objects.requireNonNull(primary, "primary must be non-null"); - Objects.requireNonNull(secondary, "ipv4Address must be non-null"); - if (secondary.isPresent() && !isV4(secondary.get())) { // Secondary must be IPv4, if present - throw new IllegalArgumentException("Invalid IPv4 address '" + secondary + "'"); - } + private final Optional<String> ipv4Address; + private final Optional<String> ipv6Address; + + private Allocation(String hostname, Optional<String> ipv4Address, Optional<String> ipv6Address) { this.hostname = Objects.requireNonNull(hostname, "hostname must be non-null"); - this.primary = primary; - this.secondary = secondary; + this.ipv4Address = Objects.requireNonNull(ipv4Address, "ipv4Address must be non-null"); + this.ipv6Address = Objects.requireNonNull(ipv6Address, "ipv6Address must be non-null"); } /** @@ -350,13 +375,17 @@ public class IP { * A successful allocation is guaranteed to have an IPv6 address, but may also have an IPv4 address if the * hostname of the IPv6 address has an A record. * - * @param ipAddress Unassigned IPv6 address + * @param ipv6Address Unassigned IPv6 address * @param resolver DNS name resolver to use * @throws IllegalArgumentException if DNS is misconfigured * @return An allocation containing 1 IPv6 address and 1 IPv4 address (if hostname is dual-stack) */ - private static Allocation ofIpv6(String ipAddress, NameResolver resolver) { - String hostname6 = resolver.resolveHostname(ipAddress).orElseThrow(() -> new IllegalArgumentException("Could not resolve IP address: " + ipAddress)); + private static Allocation ofIpv6(String ipv6Address, NameResolver resolver) { + if (!isV6(ipv6Address)) { + throw new IllegalArgumentException("Invalid IPv6 address '" + ipv6Address + "'"); + } + + String hostname6 = resolver.resolveHostname(ipv6Address).orElseThrow(() -> new IllegalArgumentException("Could not resolve IP address: " + ipv6Address)); List<String> ipv4Addresses = resolver.resolveAll(hostname6).stream() .filter(IP::isV4) .collect(Collectors.toList()); @@ -369,10 +398,10 @@ public class IP { if (!hostname6.equals(hostname4)) { throw new IllegalArgumentException(String.format("Hostnames resolved from each IP address do not " + "point to the same hostname [%s -> %s, %s -> %s]", - ipAddress, hostname6, addr, hostname4)); + ipv6Address, hostname6, addr, hostname4)); } }); - return new Allocation(hostname6, ipAddress, ipv4Address); + return new Allocation(hostname6, ipv4Address, Optional.of(ipv6Address)); } /** @@ -391,7 +420,11 @@ public class IP { throw new IllegalArgumentException("Hostname " + hostname4 + " did not resolve to exactly 1 address. " + "Resolved: " + addresses); } - return new Allocation(hostname4, addresses.get(0), Optional.empty()); + return new Allocation(hostname4, Optional.of(addresses.get(0)), Optional.empty()); + } + + private static Allocation ofAddress(Address address) { + return new Allocation(address.hostname(), Optional.empty(), Optional.empty()); } /** Hostname pointing to the IP addresses in this */ @@ -399,27 +432,28 @@ public class IP { return hostname; } - /** Primary address of this allocation */ - public String primary() { - return primary; + /** IPv4 address of this allocation */ + public Optional<String> ipv4Address() { + return ipv4Address; } - /** Secondary address of this allocation */ - public Optional<String> secondary() { - return secondary; + /** IPv6 address of this allocation */ + public Optional<String> ipv6Address() { + return ipv6Address; } /** All IP addresses in this */ public Set<String> addresses() { ImmutableSet.Builder<String> builder = ImmutableSet.builder(); - secondary.ifPresent(builder::add); - builder.add(primary); + ipv4Address.ifPresent(builder::add); + ipv6Address.ifPresent(builder::add); return builder.build(); } @Override public String toString() { - return String.format("IP allocation [primary=%s, secondary=%s]", primary, secondary.orElse("<none>")); + return String.format("Address allocation [hostname=%s, IPv4=%s, IPv6=%s]", + hostname, ipv4Address.orElse("<none>"), ipv6Address.orElse("<none>")); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 2ddbd6def6f..3979b898145 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -47,6 +47,7 @@ public class ApplicationSerializer { private static final String groupsKey = "groups"; private static final String nodeResourcesKey = "resources"; private static final String scalingEventsKey = "scalingEvents"; + private static final String autoscalingStatusKey = "autoscalingStatus"; private static final String fromKey = "from"; private static final String toKey = "to"; private static final String generationKey = "generation"; @@ -95,6 +96,7 @@ public class ApplicationSerializer { cluster.suggestedResources().ifPresent(suggested -> toSlime(suggested, clusterObject.setObject(suggestedResourcesKey))); cluster.targetResources().ifPresent(target -> toSlime(target, clusterObject.setObject(targetResourcesKey))); scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray(scalingEventsKey)); + clusterObject.setString(autoscalingStatusKey, cluster.autoscalingStatus()); } private static Cluster clusterFromSlime(String id, Inspector clusterObject) { @@ -104,7 +106,8 @@ public class ApplicationSerializer { clusterResourcesFromSlime(clusterObject.field(maxResourcesKey)), optionalClusterResourcesFromSlime(clusterObject.field(suggestedResourcesKey)), optionalClusterResourcesFromSlime(clusterObject.field(targetResourcesKey)), - scalingEventsFromSlime(clusterObject.field(scalingEventsKey))); + scalingEventsFromSlime(clusterObject.field(scalingEventsKey)), + clusterObject.field(autoscalingStatusKey).asString()); } private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java index 45156c57481..b4cb9158a5c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java @@ -6,9 +6,6 @@ import com.google.common.base.Suppliers; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.curator.Lock; -import com.yahoo.vespa.flags.BooleanFlag; -import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; import java.time.Duration; @@ -31,7 +28,6 @@ public class ContainerImages { private final CuratorDatabaseClient db; private final DockerImage defaultImage; - private final BooleanFlag replaceImage; /** * The container image is read on every request to /nodes/v2/node/[fqdn]. Cache current images to avoid @@ -40,10 +36,9 @@ public class ContainerImages { */ private volatile Supplier<Map<NodeType, DockerImage>> images; - public ContainerImages(CuratorDatabaseClient db, DockerImage defaultImage, FlagSource flagSource) { + public ContainerImages(CuratorDatabaseClient db, DockerImage defaultImage) { this.db = db; this.defaultImage = defaultImage; - this.replaceImage = Flags.REGIONAL_CONTAINER_REGISTRY.bindTo(flagSource); createCache(); } @@ -85,7 +80,7 @@ public class ContainerImages { /** Rewrite the registry part of given image, using this zone's default image */ private DockerImage rewriteRegistry(DockerImage image) { DockerImage zoneImage = defaultImage; - if (zoneImage.replacedBy().isPresent() && replaceImage.value()) { + if (zoneImage.replacedBy().isPresent()) { zoneImage = zoneImage.replacedBy().get(); } return image.withRegistry(zoneImage.registry()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index b0baae650e4..6462fb6f19d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -71,47 +71,47 @@ public class GroupPreparer { } // There were some changes, so re-do the allocation with locks - try (Mutex lock = nodeRepository.lock(application)) { - try (Mutex allocationLock = nodeRepository.lockUnallocated()) { - NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - highestIndex, wantedGroups, allocationLock); - - if (nodeRepository.zone().getCloud().dynamicProvisioning()) { - Version osVersion = nodeRepository.osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); - List<ProvisionedHost> provisionedHosts = allocation.getFulfilledDockerDeficit() - .map(deficit -> hostProvisioner.get().provisionHosts(nodeRepository.database().getProvisionIndexes(deficit.getCount()), - deficit.getFlavor(), - application, - osVersion, - requestedNodes.isExclusive() ? HostSharing.exclusive : HostSharing.any)) - .orElseGet(List::of); - - // At this point we have started provisioning of the hosts, the first priority is to make sure that - // the returned hosts are added to the node-repo so that they are tracked by the provision maintainers - List<Node> hosts = provisionedHosts.stream() - .map(ProvisionedHost::generateHost) - .collect(Collectors.toList()); - nodeRepository.addNodes(hosts, Agent.application); - - // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit - List<NodeCandidate> candidates = provisionedHosts.stream() - .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), - host.generateHost())) - .collect(Collectors.toList()); - allocation.offer(candidates); - } - - if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new OutOfCapacityException((cluster.group().isPresent() ? "Out of capacity on " + cluster.group().get() :"") + - allocation.outOfCapacityDetails()); - - // Carry out and return allocation - nodeRepository.reserve(allocation.reservableNodes()); - nodeRepository.addDockerNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); - List<Node> acceptedNodes = allocation.finalNodes(); - surplusActiveNodes.removeAll(acceptedNodes); - return acceptedNodes; + try (Mutex lock = nodeRepository.lock(application); + Mutex allocationLock = nodeRepository.lockUnallocated()) { + + NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, + highestIndex, wantedGroups, allocationLock); + + if (nodeRepository.zone().getCloud().dynamicProvisioning()) { + Version osVersion = nodeRepository.osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); + List<ProvisionedHost> provisionedHosts = allocation.getFulfilledDockerDeficit() + .map(deficit -> hostProvisioner.get().provisionHosts(nodeRepository.database().getProvisionIndexes(deficit.getCount()), + deficit.getFlavor(), + application, + osVersion, + requestedNodes.isExclusive() ? HostSharing.exclusive : HostSharing.any)) + .orElseGet(List::of); + + // At this point we have started provisioning of the hosts, the first priority is to make sure that + // the returned hosts are added to the node-repo so that they are tracked by the provision maintainers + List<Node> hosts = provisionedHosts.stream() + .map(ProvisionedHost::generateHost) + .collect(Collectors.toList()); + nodeRepository.addNodes(hosts, Agent.application); + + // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit + List<NodeCandidate> candidates = provisionedHosts.stream() + .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), + host.generateHost())) + .collect(Collectors.toList()); + allocation.offer(candidates); } + + if (! allocation.fulfilled() && requestedNodes.canFail()) + throw new OutOfCapacityException((cluster.group().isPresent() ? "Out of capacity on " + cluster.group().get() :"") + + allocation.outOfCapacityDetails()); + + // Carry out and return allocation + nodeRepository.reserve(allocation.reservableNodes()); + nodeRepository.addDockerNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); + List<Node> acceptedNodes = allocation.finalNodes(); + surplusActiveNodes.removeAll(acceptedNodes); + return acceptedNodes; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java index 96053fdaa91..af3bde02421 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java @@ -82,7 +82,11 @@ public class HostCapacity { * Number of free (not allocated) IP addresses assigned to the dockerhost. */ int freeIPs(Node dockerHost) { - return dockerHost.ipConfig().pool().findUnused(allNodes).size(); + if (dockerHost.type() == NodeType.host) { + return dockerHost.ipConfig().pool().eventuallyUnusedAddressCount(allNodes); + } else { + return dockerHost.ipConfig().pool().findUnusedIpAddresses(allNodes).size(); + } } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index f8231072a28..14937e6afeb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -363,11 +363,11 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { try { allocation = parent.get().ipConfig().pool().findAllocation(allNodes, nodeRepository.nameResolver()); if (allocation.isEmpty()) return new InvalidNodeCandidate(resources, freeParentCapacity, parent.get(), - "No IP addresses available on parent host"); + "No addresses available on parent host"); } catch (Exception e) { - log.warning("Failed allocating IP address on " + parent.get() +": " + Exceptions.toMessageString(e)); + log.warning("Failed allocating address on " + parent.get() +": " + Exceptions.toMessageString(e)); return new InvalidNodeCandidate(resources, freeParentCapacity, parent.get(), - "Failed when allocating IP address on host"); + "Failed when allocating address on host"); } Node node = Node.createDockerNode(allocation.get().addresses(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionedHost.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionedHost.java index 61cedbb9373..02621c79019 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionedHost.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionedHost.java @@ -7,10 +7,12 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.OsVersion; import com.yahoo.vespa.hosted.provision.node.Status; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -26,25 +28,33 @@ public class ProvisionedHost { private final String hostHostname; private final Flavor hostFlavor; private final Optional<ApplicationId> exclusiveTo; - private final String nodeHostname; + private final List<Address> nodeAddresses; private final NodeResources nodeResources; private final Version osVersion; public ProvisionedHost(String id, String hostHostname, Flavor hostFlavor, Optional<ApplicationId> exclusiveTo, - String nodeHostname, NodeResources nodeResources, Version osVersion) { + List<Address> nodeAddresses, NodeResources nodeResources, Version osVersion) { this.id = Objects.requireNonNull(id, "Host id must be set"); this.hostHostname = Objects.requireNonNull(hostHostname, "Host hostname must be set"); this.hostFlavor = Objects.requireNonNull(hostFlavor, "Host flavor must be set"); this.exclusiveTo = Objects.requireNonNull(exclusiveTo, "exclusiveTo must be set"); - this.nodeHostname = Objects.requireNonNull(nodeHostname, "Node hostname must be set"); + this.nodeAddresses = validateNodeAddresses(nodeAddresses); this.nodeResources = Objects.requireNonNull(nodeResources, "Node resources must be set"); this.osVersion = Objects.requireNonNull(osVersion, "OS version must be set"); } + private static List<Address> validateNodeAddresses(List<Address> nodeAddresses) { + Objects.requireNonNull(nodeAddresses, "Node addresses must be set"); + if (nodeAddresses.isEmpty()) { + throw new IllegalArgumentException("There must be at least one node address"); + } + return nodeAddresses; + } + /** Generate {@link Node} instance representing the provisioned physical host */ public Node generateHost() { Node.Builder builder = Node - .create(id, IP.Config.EMPTY, hostHostname, hostFlavor, NodeType.host) + .create(id, IP.Config.of(Set.of(), Set.of(), nodeAddresses), hostHostname, hostFlavor, NodeType.host) .status(Status.initial().withOsVersion(OsVersion.EMPTY.withCurrent(Optional.of(osVersion)))); exclusiveTo.ifPresent(builder::exclusiveTo); return builder.build(); @@ -52,7 +62,7 @@ public class ProvisionedHost { /** Generate {@link Node} instance representing the node running on this physical host */ public Node generateNode() { - return Node.createDockerNode(Set.of(), nodeHostname, hostHostname, nodeResources, NodeType.tenant).build(); + return Node.createDockerNode(Set.of(), nodeHostname(), hostHostname, nodeResources, NodeType.tenant).build(); } public String getId() { @@ -68,7 +78,11 @@ public class ProvisionedHost { } public String nodeHostname() { - return nodeHostname; + return nodeAddresses.get(0).hostname(); + } + + public List<Address> nodeAddresses() { + return nodeAddresses; } public NodeResources nodeResources() { return nodeResources; } @@ -81,14 +95,14 @@ public class ProvisionedHost { return id.equals(that.id) && hostHostname.equals(that.hostHostname) && hostFlavor.equals(that.hostFlavor) && - nodeHostname.equals(that.nodeHostname) && + nodeAddresses.equals(that.nodeAddresses) && nodeResources.equals(that.nodeResources) && osVersion.equals(that.osVersion); } @Override public int hashCode() { - return Objects.hash(id, hostHostname, hostFlavor, nodeHostname, nodeResources, osVersion); + return Objects.hash(id, hostHostname, hostFlavor, nodeAddresses, nodeResources, osVersion); } @Override @@ -97,7 +111,7 @@ public class ProvisionedHost { "id='" + id + '\'' + ", hostHostname='" + hostHostname + '\'' + ", hostFlavor=" + hostFlavor + - ", nodeHostname='" + nodeHostname + '\'' + + ", nodeAddresses='" + nodeAddresses + '\'' + ", nodeResources=" + nodeResources + ", osVersion=" + osVersion + '}'; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 9433b89ddc4..91b54fa37e9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import java.net.URI; @@ -51,6 +52,8 @@ public class ApplicationSerializer { toSlime(currentResources, clusterObject.setObject("current")); cluster.suggestedResources().ifPresent(suggested -> toSlime(suggested, clusterObject.setObject("suggested"))); cluster.targetResources().ifPresent(target -> toSlime(target, clusterObject.setObject("target"))); + scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents")); + clusterObject.setString("autoscalingStatus", cluster.autoscalingStatus()); } private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) { @@ -59,4 +62,13 @@ public class ApplicationSerializer { NodeResourcesSerializer.toSlime(resources.nodeResources(), clusterResourcesObject.setObject("resources")); } + private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor scalingEventsArray) { + for (ScalingEvent scalingEvent : scalingEvents) { + Cursor scalingEventObject = scalingEventsArray.addObject(); + toSlime(scalingEvent.from(), scalingEventObject.setObject("from")); + toSlime(scalingEvent.to(), scalingEventObject.setObject("to")); + scalingEventObject.setLong("at", scalingEvent.at().toEpochMilli()); + } + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index 304cebb3c01..c43629aeb09 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -29,6 +29,7 @@ import com.yahoo.vespa.hosted.provision.NoSuchNodeException; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter; @@ -256,8 +257,12 @@ public class NodesV2ApiHandler extends LoggingRequestHandler { Set<String> ipAddressPool = new HashSet<>(); inspector.field("additionalIpAddresses").traverse((ArrayTraverser) (i, item) -> ipAddressPool.add(item.asString())); + List<Address> addressPool = new ArrayList<>(); + inspector.field("additionalHostnames").traverse((ArrayTraverser) (i, item) -> + addressPool.add(new Address(item.asString()))); + Node.Builder builder = Node.create(inspector.field("openStackId").asString(), - IP.Config.of(ipAddresses, ipAddressPool, List.of()), + IP.Config.of(ipAddresses, ipAddressPool, addressPool), inspector.field("hostname").asString(), flavorFromSlime(inspector), nodeTypeFromSlime(inspector.field("type"))); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 5813a7067cd..5393aa7cfb8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -17,6 +17,7 @@ import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import org.junit.Test; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -44,11 +45,13 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, hostResources); + tester.clock().advance(Duration.ofDays(1)); assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.addCpuMeasurements(0.25f, 1f, 59, application1); assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); + tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 60, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, @@ -58,6 +61,8 @@ public class AutoscalingTest { assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); + + tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.8f, 1f, 3, application1); assertTrue("Load change is large, but insufficient measurements for new config -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); @@ -112,6 +117,7 @@ public class AutoscalingTest { tester.nodeRepository().getNodes(application1).stream() .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow); + tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); // Changing min and max from slow to any ClusterResources min = new ClusterResources( 2, 1, @@ -184,7 +190,7 @@ public class AutoscalingTest { } @Test - public void test_autoscaling_limits_when_min_equals_xax() { + public void test_autoscaling_limits_when_min_equals_max() { NodeResources resources = new NodeResources(3, 100, 100, 1); ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); ClusterResources max = min; @@ -195,6 +201,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); + tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); assertTrue(tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); } @@ -283,6 +290,31 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2)); + tester.clock().advance(Duration.ofDays(1)); + tester.addMemMeasurements(0.02f, 0.95f, 120, application1); + tester.assertResources("Scaling down", + 6, 1, 2.8, 4.0, 95.0, + tester.autoscale(application1, cluster1.id(), min, max).target()); + } + + @Test + public void scaling_down_only_after_delay() { + NodeResources hostResources = new NodeResources(6, 100, 100, 1); + ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); + AutoscalingTester tester = new AutoscalingTester(hostResources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); + + tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2)); + + // No autoscaling as it is too soon to scale down after initial deploy (counting as a scaling event) + tester.addMemMeasurements(0.02f, 0.95f, 120, application1); + assertTrue(tester.autoscale(application1, cluster1.id(), min, max).target().isEmpty()); + + // Trying the same a day later causes autoscaling + tester.clock().advance(Duration.ofDays(1)); tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, @@ -344,6 +376,7 @@ public class AutoscalingTest { // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); + tester.clock().advance(Duration.ofDays(1)); tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 4d8b6d13a86..3faa4c244ee 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -20,6 +20,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; @@ -294,7 +295,7 @@ class AutoscalingTester { "hostname" + index, hostFlavor, Optional.empty(), - "nodename" + index, + List.of(new Address("nodename" + index)), resources, osVersion)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 5e318e00288..4b14174488e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -110,9 +110,8 @@ public class AutoscalingMaintainerTest { assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // Add measurement of the expected generation, leading to rescaling - tester.clock().advance(Duration.ofSeconds(1)); + tester.clock().advance(Duration.ofHours(2)); tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1); - //tester.clock().advance(Duration.ofSeconds(1)); Instant lastMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); @@ -122,10 +121,10 @@ public class AutoscalingMaintainerTest { @Test public void test_toString() { - assertEquals("4 * [vcpu: 1.0, memory: 2.0 Gb, disk 4.0 Gb] (total: [vcpu: 4.0, memory: 8.0 Gb, disk: 16.0 Gb])", + assertEquals("4 nodes with [vcpu: 1.0, memory: 2.0 Gb, disk 4.0 Gb, bandwidth: 1.0 Gbps] (total: [vcpu: 4.0, memory: 8.0 Gb, disk 16.0 Gb, bandwidth: 4.0 Gbps])", AutoscalingMaintainer.toString(new ClusterResources(4, 1, new NodeResources(1, 2, 4, 1)))); - assertEquals("4 (in 2 groups) * [vcpu: 1.0, memory: 2.0 Gb, disk 4.0 Gb] (total: [vcpu: 4.0, memory: 8.0 Gb, disk: 16.0 Gb])", + assertEquals("4 nodes (in 2 groups) with [vcpu: 1.0, memory: 2.0 Gb, disk 4.0 Gb, bandwidth: 1.0 Gbps] (total: [vcpu: 4.0, memory: 8.0 Gb, disk 16.0 Gb, bandwidth: 4.0 Gbps])", AutoscalingMaintainer.toString(new ClusterResources(4, 2, new NodeResources(1, 2, 4, 1)))); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 478376bc0cd..2833c4e11ba 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -20,6 +20,7 @@ import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.custom.HostCapacity; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.Generation; @@ -208,12 +209,12 @@ public class DynamicProvisioningMaintainerTest { tester.maintainer.maintain(); assertTrue("No IP addresses written as DNS updates are failing", - provisioning.get().stream().allMatch(host -> host.ipConfig().pool().isEmpty())); + provisioning.get().stream().allMatch(host -> host.ipConfig().pool().getIpSet().isEmpty())); tester.hostProvisioner.without(Behaviour.failDnsUpdate); tester.maintainer.maintain(); assertTrue("IP addresses written as DNS updates are succeeding", - provisioning.get().stream().noneMatch(host -> host.ipConfig().pool().isEmpty())); + provisioning.get().stream().noneMatch(host -> host.ipConfig().pool().getIpSet().isEmpty())); } private static class DynamicProvisioningTester { @@ -338,7 +339,7 @@ public class DynamicProvisioningMaintainerTest { "hostname" + index, hostFlavor, Optional.empty(), - "nodename" + index, + List.of(new Address("nodename" + index)), resources, osVersion)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index dbc0a98d879..a25858c034f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -3,14 +3,15 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterMembership; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; -import com.yahoo.test.ManualClock; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.applicationmodel.ApplicationInstance; @@ -46,6 +47,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; @@ -59,6 +61,8 @@ import static org.mockito.Mockito.when; */ public class MetricsReporterTest { + private static final Duration LONG_INTERVAL = Duration.ofDays(1); + private final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); private final ApplicationInstanceReference reference = mock(ApplicationInstanceReference.class); @@ -138,7 +142,7 @@ public class MetricsReporterTest { orchestrator, serviceMonitor, () -> 42, - Duration.ofMinutes(1)); + LONG_INTERVAL); metricsReporter.maintain(); // Verify sum of values across dimensions, and remove these metrics to avoid checking against @@ -222,7 +226,7 @@ public class MetricsReporterTest { orchestrator, serviceMonitor, () -> 42, - Duration.ofMinutes(1)); + LONG_INTERVAL); metricsReporter.maintain(); assertEquals(0, metric.values.get("hostedVespa.readyHosts")); // Only tenants counts @@ -247,6 +251,53 @@ public class MetricsReporterTest { assertEquals(2.0, metric.sumDoubleValues("hostedVespa.docker.allocatedCapacityCpu", app2context), 0.01d); } + @Test + public void non_active_metric() { + ProvisioningTester tester = new ProvisioningTester.Builder().build(); + tester.makeReadyHosts(5, new NodeResources(64, 256, 2000, 10)); + tester.activateTenantHosts(); + TestMetric metric = new TestMetric(); + MetricsReporter metricsReporter = new MetricsReporter(tester.nodeRepository(), + metric, + tester.orchestrator(), + serviceMonitor, + () -> 42, + LONG_INTERVAL); + + + // Application is deployed + ApplicationId application = ApplicationId.from("t1", "a1", "default"); + Map<String, String> dimensions = Map.of("applicationId", application.toFullString()); + NodeResources resources = new NodeResources(2, 8, 100, 1); + List<Node> activeNodes = tester.deploy(application, Capacity.from(new ClusterResources(4, 1, resources))); + metricsReporter.maintain(); + assertEquals(0D, getMetric("nodes.nonActiveFraction", metric, dimensions)); + assertEquals(4, getMetric("nodes.active", metric, dimensions)); + assertEquals(0, getMetric("nodes.nonActive", metric, dimensions)); + + // One node fails + tester.fail(activeNodes.get(0).hostname()); + metricsReporter.maintain(); + assertEquals(0.33D, getMetric("nodes.nonActiveFraction", metric, dimensions).doubleValue(), 0.005); + assertEquals(3, getMetric("nodes.active", metric, dimensions)); + assertEquals(1, getMetric("nodes.nonActive", metric, dimensions)); + + // Cluster is removed + tester.deactivate(application); + metricsReporter.maintain(); + assertEquals(1D, getMetric("nodes.nonActiveFraction", metric, dimensions).doubleValue(), Double.MIN_VALUE); + assertEquals(0, getMetric("nodes.active", metric, dimensions)); + assertEquals(3, getMetric("nodes.nonActive", metric, dimensions)); + } + + private Number getMetric(String name, TestMetric metric, Map<String, String> dimensions) { + List<TestMetric.TestContext> metrics = metric.context.get(name).stream() + .filter(ctx -> ctx.properties.entrySet().containsAll(dimensions.entrySet())) + .collect(Collectors.toList()); + if (metrics.isEmpty()) throw new IllegalArgumentException("No value found for metric " + name + " with dimensions " + dimensions); + return metrics.get(metrics.size() - 1).value; + } + private ApplicationId app(String tenant) { return new ApplicationId.Builder() .tenant(tenant) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/TestMetric.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/TestMetric.java index 09fb4d59443..b20524f678c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/TestMetric.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/TestMetric.java @@ -10,8 +10,8 @@ import java.util.Map; public class TestMetric implements Metric { - public Map<String, Number> values = new LinkedHashMap<>(); - public Map<String, List<Context>> context = new LinkedHashMap<>(); + public final Map<String, Number> values = new LinkedHashMap<>(); + public final Map<String, List<TestContext>> context = new LinkedHashMap<>(); @Override public void set(String key, Number val, Context ctx) { @@ -74,9 +74,9 @@ public class TestMetric implements Metric { /** * Context where the propertymap is not shared - but unique to each value. */ - private static class TestContext implements Context{ + static class TestContext implements Context{ Number value; - Map<String, ?> properties; + final Map<String, ?> properties; public TestContext(Map<String, ?> properties) { this.properties = properties; @@ -86,4 +86,5 @@ public class TestMetric implements Metric { this.value = value; } } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/IPTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/IPTest.java index fb9c1ad0e5a..8101405ad7f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/IPTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/IPTest.java @@ -86,8 +86,8 @@ public class IPTest { resolver.addReverseRecord("::2", "host1"); Optional<IP.Allocation> allocation = pool.findAllocation(emptyList, resolver); - assertEquals("::1", allocation.get().primary()); - assertFalse(allocation.get().secondary().isPresent()); + assertEquals(Optional.of("::1"), allocation.get().ipv6Address()); + assertFalse(allocation.get().ipv4Address().isPresent()); assertEquals("host3", allocation.get().hostname()); // Allocation fails if DNS record is missing @@ -105,16 +105,16 @@ public class IPTest { var pool = testPool(false); var allocation = pool.findAllocation(emptyList, resolver); assertFalse("Found allocation", allocation.isEmpty()); - assertEquals("127.0.0.1", allocation.get().primary()); - assertTrue("No secondary address", allocation.get().secondary().isEmpty()); + assertEquals(Optional.of("127.0.0.1"), allocation.get().ipv4Address()); + assertTrue("No IPv6 address", allocation.get().ipv6Address().isEmpty()); } @Test public void test_find_allocation_dual_stack() { IP.Pool pool = testPool(true); Optional<IP.Allocation> allocation = pool.findAllocation(emptyList, resolver); - assertEquals("::1", allocation.get().primary()); - assertEquals("127.0.0.2", allocation.get().secondary().get()); + assertEquals(Optional.of("::1"), allocation.get().ipv6Address()); + assertEquals("127.0.0.2", allocation.get().ipv4Address().get()); assertEquals("host3", allocation.get().hostname()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index 72f9e9597de..e63f31cf304 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -33,7 +33,8 @@ public class ApplicationSerializerTest { new ClusterResources(12, 6, new NodeResources(3, 6, 21, 24)), Optional.empty(), Optional.empty(), - List.of())); + List.of(), + "")); var minResources = new NodeResources(1, 2, 3, 4); clusters.add(new Cluster(ClusterSpec.Id.from("c2"), true, @@ -44,7 +45,8 @@ public class ApplicationSerializerTest { List.of(new ScalingEvent(new ClusterResources(10, 5, minResources), new ClusterResources(12, 6, minResources), 7L, - Instant.ofEpochMilli(12345L))))); + Instant.ofEpochMilli(12345L))), + "Autoscaling status")); Application original = new Application(ApplicationId.from("myTenant", "myApplication", "myInstance"), clusters); @@ -65,6 +67,7 @@ public class ApplicationSerializerTest { assertEquals(originalCluster.suggestedResources(), serializedCluster.suggestedResources()); assertEquals(originalCluster.targetResources(), serializedCluster.targetResources()); assertEquals(originalCluster.scalingEvents(), serializedCluster.scalingEvents()); + assertEquals(originalCluster.autoscalingStatus(), serializedCluster.autoscalingStatus()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java index d02244b7e11..9d390697df5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import org.junit.Test; @@ -52,22 +51,22 @@ public class ContainerImagesTest { @Test public void image_replacement() { var flagSource = new InMemoryFlagSource(); - var defaultImage = DockerImage.fromString("foo.example.com/vespa/vespa") - .withReplacedBy(DockerImage.fromString("bar.example.com/vespa/vespa")); + var defaultImage = DockerImage.fromString("foo.example.com/vespa/vespa"); var tester = new ProvisioningTester.Builder().defaultImage(defaultImage).flagSource(flagSource).build(); var hosts = tester.makeReadyNodes(2, "default", NodeType.host); tester.activateTenantHosts(); - // Default image is used with flag disabled - flagSource.withBooleanFlag(Flags.REGIONAL_CONTAINER_REGISTRY.id(), false); + // Default image is used when there is no replacement for (var host : hosts) { assertEquals(defaultImage, tester.nodeRepository().containerImages().imageFor(host.type())); } - // Enabling flag switches to replacement - flagSource.withBooleanFlag(Flags.REGIONAL_CONTAINER_REGISTRY.id(), true); + // Replacement image is preferred + DockerImage imageWithReplacement = defaultImage.withReplacedBy(DockerImage.fromString("bar.example.com/vespa/vespa")); + tester = new ProvisioningTester.Builder().defaultImage(imageWithReplacement).flagSource(flagSource).build(); + hosts = tester.makeReadyNodes(2, "default", NodeType.host); for (var host : hosts) { - assertEquals(defaultImage.replacedBy().get().asString(), + assertEquals(imageWithReplacement.replacedBy().get().asString(), tester.nodeRepository().containerImages().imageFor(host.type()).asString()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java index 4917a59879f..919d02c435c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java @@ -20,6 +20,7 @@ import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; @@ -471,7 +472,7 @@ public class DynamicDockerProvisionTest { throw new OutOfCapacityException("No host flavor matches " + resources); return provisionIndexes.stream() .map(i -> new ProvisionedHost("id-" + i, "host-" + i, hostFlavor.get(), Optional.empty(), - "host-" + i + "-1", resources, osVersion)) + List.of(new Address("host-" + i + "-1")), resources, osVersion)) .collect(Collectors.toList()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java index c6e89680e85..808770f42dc 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java @@ -7,6 +7,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.IP; import org.junit.Before; import org.junit.Test; @@ -15,6 +16,8 @@ import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -32,8 +35,8 @@ public class HostCapacityTest { private HostCapacity capacity; private List<Node> nodes; private Node host1, host2, host3; - private final NodeResources resources1 = new NodeResources(1, 30, 20, 1.5); - private final NodeResources resources2 = new NodeResources(2, 40, 40, 0.5); + private final NodeResources dockerResources = new NodeResources(1, 30, 20, 1.5); + private final NodeResources docker2Resources = new NodeResources(2, 40, 40, 0.5); @Before public void setup() { @@ -48,15 +51,15 @@ public class HostCapacityTest { host3 = Node.create("host3", IP.Config.of(Set.of("::21"), generateIPs(22, 1), List.of()), "host3", nodeFlavors.getFlavorOrThrow("host"), NodeType.host).build(); // Add two containers to host1 - var nodeA = Node.createDockerNode(Set.of("::2"), "nodeA", "host1", resources1, NodeType.tenant).build(); - var nodeB = Node.createDockerNode(Set.of("::3"), "nodeB", "host1", resources1, NodeType.tenant).build(); + var nodeA = Node.createDockerNode(Set.of("::2"), "nodeA", "host1", dockerResources, NodeType.tenant).build(); + var nodeB = Node.createDockerNode(Set.of("::3"), "nodeB", "host1", dockerResources, NodeType.tenant).build(); // Add two containers to host 2 (same as host 1) - var nodeC = Node.createDockerNode(Set.of("::12"), "nodeC", "host2", resources1, NodeType.tenant).build(); - var nodeD = Node.createDockerNode(Set.of("::13"), "nodeD", "host2", resources1, NodeType.tenant).build(); + var nodeC = Node.createDockerNode(Set.of("::12"), "nodeC", "host2", dockerResources, NodeType.tenant).build(); + var nodeD = Node.createDockerNode(Set.of("::13"), "nodeD", "host2", dockerResources, NodeType.tenant).build(); // Add a larger container to host3 - var nodeE = Node.createDockerNode(Set.of("::22"), "nodeE", "host3", resources2, NodeType.tenant).build(); + var nodeE = Node.createDockerNode(Set.of("::22"), "nodeE", "host3", docker2Resources, NodeType.tenant).build(); // init docker host capacity nodes = new ArrayList<>(List.of(host1, host2, host3, nodeA, nodeB, nodeC, nodeD, nodeE)); @@ -65,19 +68,19 @@ public class HostCapacityTest { @Test public void hasCapacity() { - assertTrue(capacity.hasCapacity(host1, resources1)); - assertTrue(capacity.hasCapacity(host1, resources2)); - assertTrue(capacity.hasCapacity(host2, resources1)); - assertTrue(capacity.hasCapacity(host2, resources2)); - assertFalse(capacity.hasCapacity(host3, resources1)); // No ip available - assertFalse(capacity.hasCapacity(host3, resources2)); // No ip available + assertTrue(capacity.hasCapacity(host1, dockerResources)); + assertTrue(capacity.hasCapacity(host1, docker2Resources)); + assertTrue(capacity.hasCapacity(host2, dockerResources)); + assertTrue(capacity.hasCapacity(host2, docker2Resources)); + assertFalse(capacity.hasCapacity(host3, dockerResources)); // No ip available + assertFalse(capacity.hasCapacity(host3, docker2Resources)); // No ip available // Add a new node to host1 to deplete the memory resource - Node nodeF = Node.createDockerNode(Set.of("::6"), "nodeF", "host1", resources1, NodeType.tenant).build(); + Node nodeF = Node.createDockerNode(Set.of("::6"), "nodeF", "host1", dockerResources, NodeType.tenant).build(); nodes.add(nodeF); capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator); - assertFalse(capacity.hasCapacity(host1, resources1)); - assertFalse(capacity.hasCapacity(host1, resources2)); + assertFalse(capacity.hasCapacity(host1, dockerResources)); + assertFalse(capacity.hasCapacity(host1, docker2Resources)); } @Test @@ -112,19 +115,78 @@ public class HostCapacityTest { var nodeFlavors = FlavorConfigBuilder.createDummies("devhost", "container"); var devHost = Node.create("devhost", new IP.Config(Set.of("::1"), generateIPs(2, 10)), "devhost", nodeFlavors.getFlavorOrThrow("devhost"), NodeType.devhost).build(); - var cfg = Node.createDockerNode(Set.of("::2"), "cfg", "devhost", resources1, NodeType.config).build(); + var cfg = Node.createDockerNode(Set.of("::2"), "cfg", "devhost", dockerResources, NodeType.config).build(); var nodes = new ArrayList<>(List.of(cfg)); var capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator); - assertTrue(capacity.hasCapacity(devHost, resources1)); + assertTrue(capacity.hasCapacity(devHost, dockerResources)); - var container1 = Node.createDockerNode(Set.of("::3"), "container1", "devhost", resources1, NodeType.tenant).build(); + var container1 = Node.createDockerNode(Set.of("::3"), "container1", "devhost", dockerResources, NodeType.tenant).build(); nodes = new ArrayList<>(List.of(cfg, container1)); capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator); - assertFalse(capacity.hasCapacity(devHost, resources1)); + assertFalse(capacity.hasCapacity(devHost, dockerResources)); } + @Test + public void verifyCapacityFromAddresses() { + Node nodeA = Node.createDockerNode(Set.of("::2"), "nodeA", "host1", dockerResources, NodeType.tenant).build(); + Node nodeB = Node.createDockerNode(Set.of("::3"), "nodeB", "host1", dockerResources, NodeType.tenant).build(); + Node nodeC = Node.createDockerNode(Set.of("::4"), "nodeC", "host1", dockerResources, NodeType.tenant).build(); + + // host1 is a host with resources = 7-100-120-5 (7 vcpus, 100G memory, 120G disk, and 5Gbps), + // while nodeA-C have resources = dockerResources = 1-30-20-1.5 + + Node host1 = setupHostWithAdditionalHostnames("host1", "nodeA"); + // Allocating nodeA should be OK + assertTrue(hasCapacity(dockerResources, host1)); + // then, the second node lacks hostname address + assertFalse(hasCapacity(dockerResources, host1, nodeA)); + + host1 = setupHostWithAdditionalHostnames("host1", "nodeA", "nodeB"); + // Allocating nodeA and nodeB should be OK + assertTrue(hasCapacity(dockerResources, host1)); + assertTrue(hasCapacity(dockerResources, host1, nodeA)); + // but the third node lacks hostname address + assertFalse(hasCapacity(dockerResources, host1, nodeA, nodeB)); + + host1 = setupHostWithAdditionalHostnames("host1", "nodeA", "nodeB", "nodeC"); + // Allocating nodeA, nodeB, and nodeC should be OK + assertTrue(hasCapacity(dockerResources, host1)); + assertTrue(hasCapacity(dockerResources, host1, nodeA)); + assertTrue(hasCapacity(dockerResources, host1, nodeA, nodeB)); + // but the fourth node lacks hostname address + assertFalse(hasCapacity(dockerResources, host1, nodeA, nodeB, nodeC)); + + host1 = setupHostWithAdditionalHostnames("host1", "nodeA", "nodeB", "nodeC", "nodeD"); + // Allocating nodeA, nodeB, and nodeC should be OK + assertTrue(hasCapacity(dockerResources, host1)); + assertTrue(hasCapacity(dockerResources, host1, nodeA)); + assertTrue(hasCapacity(dockerResources, host1, nodeA, nodeB)); + // but the fourth lacks memory (host has 100G, while 4x30G = 120G + assertFalse(hasCapacity(dockerResources, host1, nodeA, nodeB, nodeC)); + } + + private Node setupHostWithAdditionalHostnames(String hostHostname, String... additionalHostnames) { + List<Address> addresses = Stream.of(additionalHostnames).map(Address::new).collect(Collectors.toList()); + + doAnswer(invocation -> ((Flavor)invocation.getArguments()[0]).resources()) + .when(hostResourcesCalculator).advertisedResourcesOf(any()); + + NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies( + "host", // 7-100-120-5 + "docker"); // 2- 40- 40-0.5 = docker2Resources + + return Node.create(hostHostname, IP.Config.of(Set.of("::1"), Set.of(), addresses), hostHostname, + nodeFlavors.getFlavorOrThrow("host"), NodeType.host).build(); + } + + private boolean hasCapacity(NodeResources requestedCapacity, Node host, Node... remainingNodes) { + List<Node> nodes = Stream.concat(Stream.of(host), Stream.of(remainingNodes)).collect(Collectors.toList()); + var capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator); + return capacity.hasCapacity(host, requestedCapacity); + } + private Set<String> generateIPs(int start, int count) { // Allow 4 containers Set<String> ipAddressPool = new LinkedHashSet<>(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index f012f0a428f..b2529963a9f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -314,8 +314,12 @@ public class ProvisioningTester { } public void fail(HostSpec host) { - int beforeFailCount = nodeRepository.getNode(host.hostname(), Node.State.active).get().status().failCount(); - Node failedNode = nodeRepository.fail(host.hostname(), Agent.system, "Failing to unit test"); + fail(host.hostname()); + } + + public void fail(String hostname) { + int beforeFailCount = nodeRepository.getNode(hostname, Node.State.active).get().status().failCount(); + Node failedNode = nodeRepository.fail(hostname, Agent.system, "Failing to unit test"); assertTrue(nodeRepository.getNodes(NodeType.tenant, Node.State.failed).contains(failedNode)); assertEquals(beforeFailCount + 1, failedNode.status().failCount()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index a98d383e219..86427fe30ae 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -91,8 +91,9 @@ public class NodesV2ApiTest { // POST new nodes assertResponse(new Request("http://localhost:8080/nodes/v2/node", ("[" + asNodeJson("host8.yahoo.com", "default", "127.0.8.1") + "," + // test with only 1 ip address - asHostJson("host9.yahoo.com", "large-variant", "127.0.9.1", "::9:1") + "," + - asNodeJson("parent2.yahoo.com", NodeType.host, "large-variant", Optional.of(TenantName.from("myTenant")), Optional.of(ApplicationId.from("tenant1", "app1", "instance1")), Optional.empty(), "127.0.127.1", "::127:1") + "," + + asHostJson("host9.yahoo.com", "large-variant", List.of("node9-1.yahoo.com"), "127.0.9.1", "::9:1") + "," + + asNodeJson("parent2.yahoo.com", NodeType.host, "large-variant", Optional.of(TenantName.from("myTenant")), + Optional.of(ApplicationId.from("tenant1", "app1", "instance1")), Optional.empty(), List.of(), "127.0.127.1", "::127:1") + "," + asDockerNodeJson("host11.yahoo.com", "parent.host.yahoo.com", "::11") + "]"). getBytes(StandardCharsets.UTF_8), Request.Method.POST), @@ -322,7 +323,7 @@ public class NodesV2ApiTest { // Attempt to POST host node with already assigned IP tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node", - "[" + asHostJson("host200.yahoo.com", "default", "127.0.2.1") + "]", + "[" + asHostJson("host200.yahoo.com", "default", List.of(), "127.0.2.1") + "]", Request.Method.POST), 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Cannot assign [127.0.2.1] to host200.yahoo.com: [127.0.2.1] already assigned to host2.yahoo.com\"}"); @@ -334,7 +335,7 @@ public class NodesV2ApiTest { // Node types running a single container can share their IP address with child node tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node", - "[" + asNodeJson("cfghost42.yahoo.com", NodeType.confighost, "default", Optional.empty(), Optional.empty(), Optional.empty(), "127.0.42.1") + "]", + "[" + asNodeJson("cfghost42.yahoo.com", NodeType.confighost, "default", Optional.empty(), Optional.empty(), Optional.empty(), List.of(), "127.0.42.1") + "]", Request.Method.POST), 200, "{\"message\":\"Added 1 nodes to the provisioned state\"}"); tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node", @@ -350,7 +351,7 @@ public class NodesV2ApiTest { // ... nor with child node on different host tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node", - "[" + asNodeJson("cfghost43.yahoo.com", NodeType.confighost, "default", Optional.empty(), Optional.empty(), Optional.empty(), "127.0.43.1") + "]", + "[" + asNodeJson("cfghost43.yahoo.com", NodeType.confighost, "default", Optional.empty(), Optional.empty(), Optional.empty(), List.of(), "127.0.43.1") + "]", Request.Method.POST), 200, "{\"message\":\"Added 1 nodes to the provisioned state\"}"); tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/cfg42.yahoo.com", @@ -392,7 +393,7 @@ public class NodesV2ApiTest { @Test public void fails_to_ready_node_with_hard_fail() throws Exception { assertResponse(new Request("http://localhost:8080/nodes/v2/node", - ("[" + asHostJson("host12.yahoo.com", "default") + "]"). + ("[" + asHostJson("host12.yahoo.com", "default", List.of()) + "]"). getBytes(StandardCharsets.UTF_8), Request.Method.POST), "{\"message\":\"Added 1 nodes to the provisioned state\"}"); @@ -961,7 +962,8 @@ public class NodesV2ApiTest { public void test_node_switch_hostname() throws Exception { String hostname = "host42.yahoo.com"; // Add host with switch hostname - String json = asNodeJson(hostname, NodeType.host, "default", Optional.empty(), Optional.empty(), Optional.of("switch0"), "127.0.42.1", "::42:1"); + String json = asNodeJson(hostname, NodeType.host, "default", Optional.empty(), Optional.empty(), + Optional.of("switch0"), List.of(), "127.0.42.1", "::42:1"); assertResponse(new Request("http://localhost:8080/nodes/v2/node", ("[" + json + "]").getBytes(StandardCharsets.UTF_8), Request.Method.POST), @@ -1013,17 +1015,22 @@ public class NodesV2ApiTest { "\"flavor\":\"" + flavor + "\"}"; } - private static String asHostJson(String hostname, String flavor, String... ipAddress) { - return asNodeJson(hostname, NodeType.host, flavor, Optional.empty(), Optional.empty(), Optional.empty(), ipAddress); + private static String asHostJson(String hostname, String flavor, List<String> additionalHostnames, String... ipAddress) { + return asNodeJson(hostname, NodeType.host, flavor, Optional.empty(), Optional.empty(), Optional.empty(), + additionalHostnames, ipAddress); } - private static String asNodeJson(String hostname, NodeType nodeType, String flavor, Optional<TenantName> reservedTo, Optional<ApplicationId> exclusiveTo, Optional<String> switchHostname, String... ipAddress) { + private static String asNodeJson(String hostname, NodeType nodeType, String flavor, Optional<TenantName> reservedTo, + Optional<ApplicationId> exclusiveTo, Optional<String> switchHostname, + List<String> additionalHostnames, String... ipAddress) { return "{\"hostname\":\"" + hostname + "\", \"openStackId\":\"" + hostname + "\"," + createIpAddresses(ipAddress) + "\"flavor\":\"" + flavor + "\"" + (reservedTo.map(tenantName -> ", \"reservedTo\":\"" + tenantName.value() + "\"").orElse("")) + (exclusiveTo.map(appId -> ", \"exclusiveTo\":\"" + appId.serializedForm() + "\"").orElse("")) + (switchHostname.map(s -> ", \"switchHostname\":\"" + s + "\"").orElse("")) + + (additionalHostnames.isEmpty() ? "" : ", \"additionalHostnames\":[\"" + + String.join("\",\"", additionalHostnames) + "\"]") + ", \"type\":\"" + nodeType + "\"}"; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index 456ed18334e..82f7e04f92b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -62,7 +62,37 @@ "diskSpeed" : "fast", "storageType" : "any" } - } + }, + "scalingEvents" : [ + { + "from": { + "nodes": 0, + "groups": 0, + "resources": { + "vcpu" : 0.0, + "memoryGb": 0.0, + "diskGb": 0.0, + "bandwidthGbps": 0.0, + "diskSpeed": "fast", + "storageType": "any" + } + }, + "to": { + "nodes": 2, + "groups": 1, + "resources" : { + "vcpu": 2.0, + "memoryGb": 8.0, + "diskGb": 50.0, + "bandwidthGbps": 1.0, + "diskSpeed": "fast", + "storageType": "local" + } + }, + "at" : 123 + } + ], + "autoscalingStatus" : "" } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json index bd22087ecfa..0ee590f60e0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json @@ -38,7 +38,37 @@ "diskSpeed": "fast", "storageType": "local" } - } + }, + "scalingEvents" : [ + { + "from": { + "nodes": 0, + "groups": 0, + "resources": { + "vcpu" : 0.0, + "memoryGb": 0.0, + "diskGb": 0.0, + "bandwidthGbps": 0.0, + "diskSpeed": "fast", + "storageType": "any" + } + }, + "to": { + "nodes": 2, + "groups": 1, + "resources" : { + "vcpu": 2.0, + "memoryGb": 8.0, + "diskGb": 50.0, + "bandwidthGbps": 1.0, + "diskSpeed": "fast", + "storageType": "local" + } + }, + "at" : 123 + } + ], + "autoscalingStatus" : "" } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json index dac9fd30267..809e58bd7b6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json @@ -25,5 +25,6 @@ "127.0.9.1", "::9:1" ], - "additionalIpAddresses": [] + "additionalIpAddresses": [], + "additionalHostnames": ["node9-1.yahoo.com"] } |