aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java23
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java30
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java15
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java28
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java10
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java63
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java13
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTargetTest.java75
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java72
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java38
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java64
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json2
24 files changed, 353 insertions, 185 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index a435814c21e..3f5255c6618 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -2,16 +2,11 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.vespa.hosted.provision.NodeList;
-import com.yahoo.vespa.hosted.provision.applications.Cluster;
import java.time.Duration;
-import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.function.Predicate;
-import java.util.stream.Collectors;
/**
* A list of metric snapshots from a cluster, sorted by increasing time (newest last).
@@ -85,8 +80,8 @@ public class ClusterTimeseries {
else
return 0.0; // ... because load is stable
}
- if (queryRateNow() == 0) return 0.1; // Growth not expressible as a fraction of the current rate
- return maxGrowthRate / queryRateNow();
+ if (currentQueryRate() == 0) return 0.1; // Growth not expressible as a fraction of the current rate
+ return maxGrowthRate / currentQueryRate();
}
/** The current query rate as a fraction of the peak rate in this timeseries */
@@ -97,12 +92,22 @@ public class ClusterTimeseries {
return snapshots.get(snapshots.size() - 1).queryRate() / max;
}
+ public double currentQueryRate() {
+ return queryRateAt(snapshots.size() - 1);
+ }
+
+ public double currentWriteRate() {
+ return writeRateAt(snapshots.size() - 1);
+ }
+
private double queryRateAt(int index) {
+ if (snapshots.isEmpty()) return 0.0;
return snapshots.get(index).queryRate();
}
- private double queryRateNow() {
- return queryRateAt(snapshots.size() - 1);
+ private double writeRateAt(int index) {
+ if (snapshots.isEmpty()) return 0.0;
+ return snapshots.get(index).writeRate();
}
private Duration durationBetween(int startIndex, int endIndex) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
index ab6a6d548e9..35717b97cf4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
@@ -75,6 +75,8 @@ public class ResourceTarget {
public static double idealCpuLoad(Duration scalingDuration,
ClusterTimeseries clusterTimeseries,
Application application) {
+ double queryCpuFraction = queryCpuFraction(clusterTimeseries);
+
// What's needed to have headroom for growth during scale-up as a fraction of current resources?
double maxGrowthRate = clusterTimeseries.maxQueryGrowthRate(); // in fraction per minute of the current traffic
double growthRateHeadroom = 1 + maxGrowthRate * scalingDuration.toMinutes();
@@ -84,18 +86,40 @@ public class ResourceTarget {
growthRateHeadroom = Math.min(growthRateHeadroom, 1 / fractionOfMax + 0.1);
// How much headroom is needed to handle sudden arrival of additional traffic due to another zone going down?
+ double maxTrafficShiftHeadroom = 10.0; // Cap to avoid extreme sizes from a current very small share
double trafficShiftHeadroom;
if (application.status().maxReadShare() == 0) // No traffic fraction data
trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic
+ else if (application.status().currentReadShare() == 0)
+ trafficShiftHeadroom = maxTrafficShiftHeadroom;
else
trafficShiftHeadroom = application.status().maxReadShare() / application.status().currentReadShare();
+ trafficShiftHeadroom = Math.min(trafficShiftHeadroom, maxTrafficShiftHeadroom);
- if (trafficShiftHeadroom > 2.0) // The expectation that we have almost no load with almost no queries is incorrect due
- trafficShiftHeadroom = 2.0; // to write traffic; once that is separated we can increase this threshold
+ // Assumptions: 1) Write load is not organic so we should not grow to handle more.
+ // (TODO: But allow applications to set their target write rate and size for that)
+ // 2) Write load does not change in BCP scenarios.
+ return queryCpuFraction * 1 / growthRateHeadroom * 1 / trafficShiftHeadroom * idealQueryCpuLoad() +
+ (1 - queryCpuFraction) * idealWriteCpuLoad();
+ }
+
+ private static double queryCpuFraction(ClusterTimeseries clusterTimeseries) {
+ double queryRate = clusterTimeseries.currentQueryRate();
+ double writeRate = clusterTimeseries.currentWriteRate();
+ if (queryRate == 0 && writeRate == 0) return queryCpuFraction(0.5);
+ return queryCpuFraction(queryRate / (queryRate + writeRate));
+ }
- return 1 / growthRateHeadroom * 1 / trafficShiftHeadroom * Resource.cpu.idealAverageLoad();
+ private static double queryCpuFraction(double queryFraction) {
+ double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure
+ double writeFraction = 1 - queryFraction;
+ return queryFraction * relativeQueryCost / (queryFraction * relativeQueryCost + writeFraction);
}
+ public static double idealQueryCpuLoad() { return Resource.cpu.idealAverageLoad(); }
+
+ public static double idealWriteCpuLoad() { return 0.95; }
+
public static double idealMemoryLoad() { return Resource.memory.idealAverageLoad(); }
public static double idealDiskLoad() { return Resource.disk.idealAverageLoad(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 9df6af4d02a..6ff4e1cc20d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -6,7 +6,6 @@ import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.jdisc.Metric;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -14,17 +13,15 @@ import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
-import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot;
import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries;
import com.yahoo.vespa.hosted.provision.node.History;
import java.time.Duration;
import java.time.Instant;
-import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.stream.Collectors;
/**
* Maintainer making automatic scaling decisions
@@ -57,12 +54,12 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
boolean success = true;
if ( ! nodeRepository().zone().environment().isProduction()) return success;
- activeNodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes));
+ activeNodesByApplication().forEach(this::autoscale);
return success;
}
- private void autoscale(ApplicationId application, List<Node> applicationNodes) {
- nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, NodeList.copyOf(clusterNodes)));
+ private void autoscale(ApplicationId application, NodeList applicationNodes) {
+ nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes));
}
private void autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, NodeList clusterNodes) {
@@ -143,8 +140,8 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
return r + " (total: " + r.totalResources() + ")";
}
- private Map<ClusterSpec.Id, List<Node>> nodesByCluster(List<Node> applicationNodes) {
- return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster().id()));
+ private Map<ClusterSpec.Id, NodeList> nodesByCluster(NodeList applicationNodes) {
+ return applicationNodes.groupingBy(n -> n.allocation().get().membership().cluster().id());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index d0c02d7baaf..55548e70ddd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -90,13 +90,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
/** Resume provisioning of already provisioned hosts and their children */
private void resumeProvisioning(NodeList nodes, Mutex lock) {
- Map<String, Set<Node>> nodesByProvisionedParentHostname = nodes.nodeType(NodeType.tenant, NodeType.config).asList().stream()
- .filter(node -> node.parentHostname().isPresent())
- .collect(Collectors.groupingBy(
- node -> node.parentHostname().get(),
- Collectors.toSet()));
-
- nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost).forEach(host -> {
+ Map<String, Set<Node>> nodesByProvisionedParentHostname =
+ nodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller)
+ .asList()
+ .stream()
+ .filter(node -> node.parentHostname().isPresent())
+ .collect(Collectors.groupingBy(node -> node.parentHostname().get(), Collectors.toSet()));
+
+ nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost).forEach(host -> {
Set<Node> children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of());
try {
List<Node> updatedNodes = hostProvisioner.provision(host, children);
@@ -189,6 +190,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
// TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here
return node.state() != Node.State.parked || node.status().wantToDeprovision();
case confighost:
+ case controllerhost:
return node.state() == Node.State.parked && node.status().wantToDeprovision();
default:
return false;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index e6338d73a17..025c8be449c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -7,13 +7,12 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Clock;
import java.time.Duration;
-import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
/**
* A maintainer is some job which runs at a fixed rate to perform some maintenance task on the node repo.
@@ -41,13 +40,12 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
protected Clock clock() { return nodeRepository.clock(); }
/** A utility to group active tenant nodes by application */
- protected Map<ApplicationId, List<Node>> activeNodesByApplication() {
- return nodeRepository().nodes().list(Node.State.active)
+ protected Map<ApplicationId, NodeList> activeNodesByApplication() {
+ return nodeRepository().nodes()
+ .list(Node.State.active)
.nodeType(NodeType.tenant)
- .asList()
- .stream()
- .filter(node -> ! node.allocation().get().owner().instance().isTester())
- .collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
+ .matching(node -> ! node.allocation().get().owner().instance().isTester())
+ .groupingBy(node -> node.allocation().get().owner());
}
private static JobMetrics jobMetrics(Metric metric) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
index 49a33c4d120..f620a6d113d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
@@ -5,7 +5,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
-import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.History;
@@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.provision.node.History;
import java.time.Duration;
import java.time.Instant;
import java.util.LinkedHashSet;
-import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
@@ -38,15 +37,14 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer {
@Override
protected Set<ApplicationId> applicationsNeedingMaintenance() {
- Map<ApplicationId, List<Node>> nodesByApplication = nodeRepository().nodes().list()
- .nodeType(NodeType.tenant, NodeType.proxy).asList().stream()
- .filter(node -> node.allocation().isPresent())
- .collect(Collectors.groupingBy(node -> node.allocation().get().owner(), Collectors.toList()));
-
+ Map<ApplicationId, NodeList> nodesByApplication = nodeRepository().nodes().list()
+ .nodeType(NodeType.tenant, NodeType.proxy)
+ .matching(node -> node.allocation().isPresent())
+ .groupingBy(node -> node.allocation().get().owner());
return nodesByApplication.entrySet().stream()
- .filter(entry -> hasNodesWithChanges(entry.getKey(), entry.getValue()))
- .map(Map.Entry::getKey)
- .collect(Collectors.toCollection(LinkedHashSet::new));
+ .filter(entry -> hasNodesWithChanges(entry.getKey(), entry.getValue()))
+ .map(Map.Entry::getKey)
+ .collect(Collectors.toCollection(LinkedHashSet::new));
}
/**
@@ -61,15 +59,15 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer {
" as a manual change was made to its nodes");
}
- private boolean hasNodesWithChanges(ApplicationId applicationId, List<Node> nodes) {
+ private boolean hasNodesWithChanges(ApplicationId applicationId, NodeList nodes) {
Optional<Instant> lastDeployTime = deployer().lastDeployTime(applicationId);
if (lastDeployTime.isEmpty()) return false;
return nodes.stream()
- .flatMap(node -> node.history().events().stream())
- .filter(event -> event.agent() == Agent.operator)
- .map(History.Event::at)
- .anyMatch(e -> lastDeployTime.get().isBefore(e));
+ .flatMap(node -> node.history().events().stream())
+ .filter(event -> event.agent() == Agent.operator)
+ .map(History.Event::at)
+ .anyMatch(e -> lastDeployTime.get().isBefore(e));
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index 1274e83fb3a..f72daf1bc2b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -50,15 +50,10 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
@Override
protected boolean maintain() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
-
- Map<ApplicationId, List<Node>> retiredNodesByApplication = activeNodes.stream()
- .filter(node -> node.allocation().isPresent())
- .filter(node -> node.allocation().get().membership().retired())
- .collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
-
- for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
+ Map<ApplicationId, NodeList> retiredNodesByApplication = activeNodes.retired().groupingBy(node -> node.allocation().get().owner());
+ for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) {
ApplicationId application = entry.getKey();
- List<Node> retiredNodes = entry.getValue();
+ NodeList retiredNodes = entry.getValue();
List<Node> nodesToRemove = retiredNodes.stream().filter(n -> canRemove(n, activeNodes)).collect(Collectors.toList());
if (nodesToRemove.isEmpty()) continue;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index 3d6130c4116..e2b89879141 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -7,7 +7,6 @@ import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -17,10 +16,8 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
import java.time.Duration;
-import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.stream.Collectors;
/**
* Maintainer computing scaling suggestions for all clusters
@@ -49,10 +46,10 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
return successes > 0;
}
- private int suggest(ApplicationId application, List<Node> applicationNodes) {
+ private int suggest(ApplicationId application, NodeList applicationNodes) {
int successes = 0;
for (var cluster : nodesByCluster(applicationNodes).entrySet())
- successes += suggest(application, cluster.getKey(), NodeList.copyOf(cluster.getValue())) ? 1 : 0;
+ successes += suggest(application, cluster.getKey(), cluster.getValue()) ? 1 : 0;
return successes;
}
@@ -99,8 +96,8 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
return r1.totalResources().cost() > r2.totalResources().cost();
}
- private Map<ClusterSpec.Id, List<Node>> nodesByCluster(List<Node> applicationNodes) {
- return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster().id()));
+ private Map<ClusterSpec.Id, NodeList> nodesByCluster(NodeList applicationNodes) {
+ return applicationNodes.groupingBy(n -> n.allocation().get().membership().cluster().id());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index 3c936e4e6ba..49eb44a4ec0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@@ -100,13 +101,12 @@ class Activator {
Optional<Application> application = nodeRepository.applications().get(transaction.application());
if (application.isEmpty()) return; // infrastructure app, hopefully :-|
- var currentNodesByCluster = newNodes.stream()
- .collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id()));
+ Map<ClusterSpec.Id, NodeList> currentNodesByCluster = newNodes.groupingBy(node -> node.allocation().get().membership().cluster().id());
Application modified = application.get();
for (var clusterEntry : currentNodesByCluster.entrySet()) {
var cluster = modified.cluster(clusterEntry.getKey()).get();
var previousResources = oldNodes.cluster(clusterEntry.getKey()).toResources();
- var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources();
+ var currentResources = clusterEntry.getValue().toResources();
if ( ! previousResources.justNumbers().equals(currentResources.justNumbers())) {
cluster = cluster.with(ScalingEvent.create(previousResources, currentResources, generation, at));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 597c4c1bd8c..0f725e6447a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -92,7 +92,7 @@ public class GroupPreparer {
allocateOsRequirement);
NodeType hostType = allocation.nodeType().hostType();
boolean hostTypeSupportsDynamicProvisioning = hostType == NodeType.host ||
- (hostType == NodeType.confighost &&
+ (hostType.isConfigServerHostLike() &&
provisionConfigServerDynamically.value());
if (nodeRepository.zone().getCloud().dynamicProvisioning() && hostTypeSupportsDynamicProvisioning) {
final Version osVersion;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java
index b1bba656dc8..499eb3f23c0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java
@@ -82,7 +82,7 @@ public class LoadBalancerProvisioner {
if (application.instance().isTester()) return; // Do not provision for tester instances
try (var lock = db.lock(application)) {
ClusterSpec.Id clusterId = effectiveId(cluster);
- List<Node> nodes = nodesOf(clusterId, application);
+ NodeList nodes = nodesOf(clusterId, application);
LoadBalancerId loadBalancerId = requireNonClashing(new LoadBalancerId(application, clusterId));
ApplicationTransaction transaction = new ApplicationTransaction(new ProvisionLock(application, lock), new NestedTransaction());
provision(transaction, loadBalancerId, nodes, false);
@@ -167,7 +167,7 @@ public class LoadBalancerProvisioner {
}
/** Idempotently provision a load balancer for given application and cluster */
- private void provision(ApplicationTransaction transaction, LoadBalancerId id, List<Node> nodes, boolean activate) {
+ private void provision(ApplicationTransaction transaction, LoadBalancerId id, NodeList nodes, boolean activate) {
Instant now = nodeRepository.clock().instant();
Optional<LoadBalancer> loadBalancer = db.readLoadBalancer(id);
if (loadBalancer.isEmpty() && activate) return; // Nothing to activate as this load balancer was never prepared
@@ -185,7 +185,7 @@ public class LoadBalancerProvisioner {
db.writeLoadBalancers(List.of(newLoadBalancer), transaction.nested());
}
- private void provision(ApplicationTransaction transaction, ClusterSpec.Id clusterId, List<Node> nodes) {
+ private void provision(ApplicationTransaction transaction, ClusterSpec.Id clusterId, NodeList nodes) {
provision(transaction, new LoadBalancerId(transaction.application(), clusterId), nodes, true);
}
@@ -204,12 +204,12 @@ public class LoadBalancerProvisioner {
}
/** Returns the nodes allocated to the given load balanced cluster */
- private List<Node> nodesOf(ClusterSpec.Id loadBalancedCluster, ApplicationId application) {
- return loadBalancedClustersOf(application).getOrDefault(loadBalancedCluster, List.of());
+ private NodeList nodesOf(ClusterSpec.Id loadBalancedCluster, ApplicationId application) {
+ return loadBalancedClustersOf(application).getOrDefault(loadBalancedCluster, NodeList.copyOf(List.of()));
}
/** Returns the load balanced clusters of given application and their nodes */
- private Map<ClusterSpec.Id, List<Node>> loadBalancedClustersOf(ApplicationId application) {
+ private Map<ClusterSpec.Id, NodeList> loadBalancedClustersOf(ApplicationId application) {
NodeList nodes = nodeRepository.nodes().list(Node.State.reserved, Node.State.active).owner(application);
if (nodes.stream().anyMatch(node -> node.type() == NodeType.config)) {
nodes = nodes.nodeType(NodeType.config).type(ClusterSpec.Type.admin);
@@ -218,11 +218,11 @@ public class LoadBalancerProvisioner {
} else {
nodes = nodes.nodeType(NodeType.tenant).container();
}
- return nodes.stream().collect(Collectors.groupingBy(node -> effectiveId(node.allocation().get().membership().cluster())));
+ return nodes.groupingBy(node -> effectiveId(node.allocation().get().membership().cluster()));
}
/** Returns real servers for given nodes */
- private Set<Real> realsOf(List<Node> nodes) {
+ private Set<Real> realsOf(NodeList nodes) {
var reals = new LinkedHashSet<Real>();
for (var node : nodes) {
for (var ip : reachableIpAddresses(node)) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index 19c8d68963a..cd5355befbe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -296,8 +296,8 @@ class NodeAllocation {
* flavor and host count required to cover the deficit.
*/
Optional<HostDeficit> hostDeficit() {
- if (nodeType() != NodeType.config && nodeType() != NodeType.tenant) {
- return Optional.empty(); // Requests for these node types never have a deficit
+ if (nodeType().isHost()) {
+ return Optional.empty(); // Hosts are provisioned as required by the child application
}
return Optional.of(new HostDeficit(requestedNodes.resources().orElseGet(NodeResources::unspecified),
requestedNodes.fulfilledDeficitCount(accepted())))
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index 3ff4765dd00..482f0f2e011 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -179,7 +179,9 @@ public interface NodeSpec {
/** A node spec specifying a node type. This will accept all nodes of this type. */
class TypeNodeSpec implements NodeSpec {
- private static final Map<NodeType, Integer> WANTED_NODE_COUNT = Map.of(NodeType.config, 3);
+ private static final Map<NodeType, Integer> WANTED_NODE_COUNT = Map.of(
+ NodeType.config, 3,
+ NodeType.controller, 3);
private final NodeType type;
@@ -207,10 +209,8 @@ public interface NodeSpec {
@Override
public int idealRetiredCount(int acceptedCount, int currentRetiredCount) {
- // All nodes marked with wantToRetire get marked as retired just before this function is called,
- // the job of this function is to throttle the retired count. If no nodes are marked as retired
- // then continue this way, otherwise allow only 1 node to be retired
- return Math.min(1, currentRetiredCount);
+ // All nodes marked with wantToRetire get marked as retired just before this function is called
+ return currentRetiredCount;
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index baf7d2dbe15..650bfe761b5 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -15,7 +15,6 @@ import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
-import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import org.junit.Test;
@@ -54,6 +53,7 @@ public class AutoscalingTest {
assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty());
tester.clock().advance(Duration.ofDays(1));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
14, 1, 1.4, 30.8, 30.8,
@@ -93,6 +93,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, 1, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high",
@@ -122,6 +123,7 @@ public class AutoscalingTest {
.allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow);
tester.clock().advance(Duration.ofDays(2));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
// Changing min and max from slow to any
ClusterResources min = new ClusterResources( 2, 1,
@@ -181,6 +183,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, 1,
new NodeResources(1.9, 70, 70, 1));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1);
tester.assertResources("Scaling up to limit since resource usage is too high",
6, 1, 2.4, 78.0, 79.0,
@@ -217,6 +220,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, 5, new NodeResources(3.0, 10, 10, 1));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements( 0.3f, 1f, 240, application1);
tester.assertResources("Scaling up since resource usage is too high",
6, 6, 3.6, 8.0, 10.0,
@@ -252,6 +256,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, 1, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high",
7, 1, 2.5, 80.0, 80.0,
@@ -304,6 +309,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, 5, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high",
7, 7, 2.5, 80.0, 80.0,
@@ -322,6 +328,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 6, 2, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper",
8, 1, 2.7, 83.3, 83.3,
@@ -341,6 +348,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 6, 2, new NodeResources(10, 100, 100, 1));
tester.clock().advance(Duration.ofDays(1));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMemMeasurements(1.0f, 1f, 1000, application1);
tester.assertResources("Increase group size to reduce memory load",
8, 2, 13.6, 89.3, 62.5,
@@ -360,6 +368,7 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2));
tester.clock().advance(Duration.ofDays(2));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMemMeasurements(0.02f, 0.95f, 120, application1);
tester.assertResources("Scaling down",
6, 1, 2.9, 4.0, 95.0,
@@ -377,6 +386,7 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1");
tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
// No autoscaling as it is too soon to scale down after initial deploy (counting as a scaling event)
tester.addMemMeasurements(0.02f, 0.95f, 120, application1);
@@ -391,7 +401,7 @@ public class AutoscalingTest {
}
@Test
- public void real_resources_are_taken_into_account() {
+ public void test_autoscaling_considers_real_resources() {
NodeResources hostResources = new NodeResources(60, 100, 1000, 10);
ClusterResources min = new ClusterResources(2, 1, new NodeResources( 2, 20, 200, 1));
ClusterResources max = new ClusterResources(4, 1, new NodeResources(60, 100, 1000, 1));
@@ -403,6 +413,7 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1");
tester.deploy(application1, cluster1, min);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1);
tester.assertResources("Scaling up",
4, 1, 7.4, 20, 200,
@@ -416,6 +427,7 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1");
tester.deploy(application1, cluster1, min);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1);
tester.assertResources("Scaling up",
4, 1, 7.4, 34, 200,
@@ -455,6 +467,7 @@ public class AutoscalingTest {
tester.deactivateRetired(application1, cluster1, scaledResources);
tester.clock().advance(Duration.ofDays(2));
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addMemMeasurements(0.3f, 0.6f, 1000, application1);
tester.assertResources("Scaling down since resource usage has gone down",
6, 1, 3, 83, 28.8,
@@ -472,6 +485,7 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
tester.deploy(application1, cluster1, 5, 1, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
// (no read share stored)
@@ -502,6 +516,7 @@ public class AutoscalingTest {
ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
tester.deploy(application1, cluster1, 5, 1, resources);
+ tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
tester.addCpuMeasurements(0.25f, 1f, 120, application1);
// (no query rate data)
@@ -529,6 +544,50 @@ public class AutoscalingTest {
}
@Test
+ public void test_autoscaling_considers_query_vs_write_rate() {
+ NodeResources minResources = new NodeResources( 1, 100, 100, 1);
+ NodeResources midResources = new NodeResources( 5, 100, 100, 1);
+ NodeResources maxResources = new NodeResources(10, 100, 100, 1);
+ ClusterResources min = new ClusterResources(5, 1, minResources);
+ ClusterResources max = new ClusterResources(5, 1, maxResources);
+ AutoscalingTester tester = new AutoscalingTester(maxResources.withVcpu(maxResources.vcpu() * 2));
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ tester.deploy(application1, cluster1, 5, 1, midResources);
+ tester.addCpuMeasurements(0.4f, 1f, 120, application1);
+
+ // Why twice the query rate at time = 0?
+ // This makes headroom for queries doubling, which we want to observe the effect of here
+
+ tester.addLoadMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
+ tester.assertResources("Query and write load is equal -> scale up somewhat",
+ 5, 1, 7.3, 100, 100,
+ tester.autoscale(application1, cluster1.id(), min, max).target());
+
+ tester.addLoadMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 100.0 : 50.0, t -> 10.0);
+ tester.assertResources("Query load is 5x write load -> scale up more",
+ 5, 1, 9.7, 100, 100,
+ tester.autoscale(application1, cluster1.id(), min, max).target());
+
+ tester.addLoadMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
+ tester.assertResources("Write load is 10x query load -> scale down",
+ 5, 1, 3.8, 100, 100,
+ tester.autoscale(application1, cluster1.id(), min, max).target());
+
+ tester.addLoadMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
+ tester.assertResources("Query only -> largest possible",
+ 5, 1, 10.0, 100, 100,
+ tester.autoscale(application1, cluster1.id(), min, max).target());
+
+ tester.addLoadMeasurements(application1, cluster1.id(), 10, t -> 0.0, t -> 10.0);
+ tester.assertResources("Write only -> smallest possible",
+ 5, 1, 2.1, 100, 100,
+ tester.autoscale(application1, cluster1.id(), min, max).target());
+ }
+
+ @Test
public void test_cd_autoscaling_test() {
NodeResources resources = new NodeResources(1, 4, 50, 1);
ClusterResources min = new ClusterResources( 2, 1, resources);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 1949a6116d8..e24146d4752 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -236,6 +236,19 @@ class AutoscalingTester {
}
/** Creates the given number of measurements, spaced 5 minutes between, using the given function */
+ public void addLoadMeasurements(ApplicationId application,
+ ClusterSpec.Id cluster,
+ int measurements,
+ IntFunction<Double> queryRate,
+ IntFunction<Double> writeRate) {
+ Instant time = clock().instant();
+ for (int i = 0; i < measurements; i++) {
+ db.addClusterMetrics(application, Map.of(cluster, new ClusterMetricSnapshot(time, queryRate.apply(i), writeRate.apply(i))));
+ time = time.plus(Duration.ofMinutes(5));
+ }
+ }
+
+ /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
public void addQueryRateMeasurements(ApplicationId application,
ClusterSpec.Id cluster,
int measurements,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTargetTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTargetTest.java
new file mode 100644
index 00000000000..f616e3e8b9d
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTargetTest.java
@@ -0,0 +1,75 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.hosted.provision.applications.Application;
+import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.applications.Status;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.function.IntFunction;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class ResourceTargetTest {
+
+ private static final double delta = 0.001;
+
+ @Test
+ public void test_traffic_headroom() {
+ Application application = Application.empty(ApplicationId.from("t1", "a1", "i1"));
+ Cluster cluster = new Cluster(ClusterSpec.Id.from("test"),
+ false,
+ new ClusterResources(5, 1, new NodeResources(1, 10, 100, 1)),
+ new ClusterResources(5, 1, new NodeResources(1, 10, 100, 1)),
+ Optional.empty(),
+ Optional.empty(),
+ List.of(),
+ "");
+ application = application.with(cluster);
+
+ // No current traffic: Ideal load is low but capped
+ application = application.with(new Status(0.0, 1.0));
+ assertEquals(0.131,
+ ResourceTarget.idealCpuLoad(Duration.ofMinutes(10),
+ new ClusterTimeseries(cluster.id(),
+ loadSnapshots(100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0)),
+ application),
+ delta);
+
+ // Almost current traffic: Ideal load is low but capped
+ application = application.with(new Status(0.0001, 1.0));
+ assertEquals(0.131,
+ ResourceTarget.idealCpuLoad(Duration.ofMinutes(10),
+ new ClusterTimeseries(cluster.id(),
+ loadSnapshots(100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0)),
+ application),
+ delta);
+ }
+
+
+ /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
+ private List<ClusterMetricSnapshot> loadSnapshots(int measurements,
+ IntFunction<Double> queryRate,
+ IntFunction<Double> writeRate) {
+ List<ClusterMetricSnapshot> snapshots = new ArrayList<>(measurements);
+ ManualClock clock = new ManualClock();
+ for (int i = 0; i < measurements; i++) {
+ snapshots.add(new ClusterMetricSnapshot(clock.instant(), queryRate.apply(i), writeRate.apply(i)));
+ clock.advance(Duration.ofMinutes(5));
+ }
+ return snapshots;
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index 0c1a59c883d..f292ab8ccf1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -152,6 +152,7 @@ public class AutoscalingMaintainerTest {
// deploy
tester.deploy(app1, cluster1, app1Capacity);
+ tester.addQueryRateMeasurements(app1, cluster1.id(), 12, t -> t == 0 ? 20.0 : 10.0);
for (int i = 0; i < 20; i++) {
// Record completion to keep scaling window at minimum
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
index e8cfe6a2310..755f7608cd9 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
+import com.yahoo.vespa.hosted.provision.autoscale.ClusterMetricSnapshot;
import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
@@ -27,6 +28,7 @@ import java.time.Instant;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.function.IntFunction;
import java.util.stream.Collectors;
/**
@@ -85,6 +87,18 @@ public class AutoscalingMaintainerTester {
}
}
+ /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
+ public void addQueryRateMeasurements(ApplicationId application,
+ ClusterSpec.Id cluster,
+ int measurements,
+ IntFunction<Double> queryRate) {
+ Instant time = clock().instant();
+ for (int i = 0; i < measurements; i++) {
+ metricsDb.addClusterMetrics(application, Map.of(cluster, new ClusterMetricSnapshot(time, queryRate.apply(i), 0.0)));
+ time = time.plus(Duration.ofMinutes(5));
+ }
+ }
+
public Cluster cluster(ApplicationId application, ClusterSpec cluster) {
return nodeRepository().applications().get(application).get().cluster(cluster.id()).get();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 076a0e24620..48a6e03f646 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -36,6 +36,8 @@ import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import com.yahoo.vespa.service.duper.ConfigServerApplication;
import com.yahoo.vespa.service.duper.ConfigServerHostApplication;
+import com.yahoo.vespa.service.duper.ControllerApplication;
+import com.yahoo.vespa.service.duper.ControllerHostApplication;
import org.junit.Test;
import java.time.Duration;
@@ -421,6 +423,30 @@ public class DynamicProvisioningMaintainerTest {
@Test
public void replace_config_server() {
+ replace_config_server_like(NodeType.confighost);
+ }
+
+ @Test
+ public void replace_controller() {
+ replace_config_server_like(NodeType.controllerhost);
+ }
+
+ public void replace_config_server_like(NodeType hostType) {
+ final ApplicationId hostApp;
+ final ApplicationId configSrvApp;
+ switch (hostType) {
+ case confighost:
+ hostApp = new ConfigServerHostApplication().getApplicationId();
+ configSrvApp = new ConfigServerApplication().getApplicationId();
+ break;
+ case controllerhost:
+ hostApp = new ControllerHostApplication().getApplicationId();
+ configSrvApp = new ControllerApplication().getApplicationId();
+ break;
+ default:
+ throw new IllegalArgumentException("Unexpected config server host like node type: " + hostType);
+ }
+
Cloud cloud = Cloud.builder().dynamicProvisioning(true).build();
DynamicProvisioningTester dynamicProvisioningTester = new DynamicProvisioningTester(cloud, new MockNameResolver().mockAnyLookup());
ProvisioningTester tester = dynamicProvisioningTester.provisioningTester;
@@ -428,24 +454,22 @@ public class DynamicProvisioningMaintainerTest {
dynamicProvisioningTester.flagSource.withBooleanFlag(Flags.DYNAMIC_CONFIG_SERVER_PROVISIONING.id(), true);
// Initial config server hosts are provisioned manually
- ApplicationId hostApp = ApplicationId.from("hosted-vespa", "configserver-host", "default");
- List<Node> provisionedHosts = tester.makeReadyNodes(3, "default", NodeType.confighost).stream()
+ List<Node> provisionedHosts = tester.makeReadyNodes(3, "default", hostType).stream()
.sorted(Comparator.comparing(Node::hostname))
.collect(Collectors.toList());
- tester.prepareAndActivateInfraApplication(hostApp, NodeType.confighost);
+ tester.prepareAndActivateInfraApplication(hostApp, hostType);
// Provision config servers
- ApplicationId configSrvApp = ApplicationId.from("hosted-vespa", "zone-config-servers", "default");
for (int i = 0; i < provisionedHosts.size(); i++) {
- tester.makeReadyChildren(1, i + 1, NodeResources.unspecified(), NodeType.config,
- provisionedHosts.get(i).hostname(), (nodeIndex) -> "cfg" + nodeIndex);
+ tester.makeReadyChildren(1, i + 1, NodeResources.unspecified(), hostType.childNodeType(),
+ provisionedHosts.get(i).hostname(), (nodeIndex) -> "cfg" + nodeIndex);
}
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
// Expected number of hosts and children are provisioned
NodeList allNodes = tester.nodeRepository().nodes().list();
- NodeList configHosts = allNodes.nodeType(NodeType.confighost);
- NodeList configNodes = allNodes.nodeType(NodeType.config);
+ NodeList configHosts = allNodes.nodeType(hostType);
+ NodeList configNodes = allNodes.nodeType(hostType.childNodeType());
assertEquals(3, configHosts.size());
assertEquals(3, configNodes.size());
String hostnameToRemove = provisionedHosts.get(1).hostname();
@@ -456,20 +480,20 @@ public class DynamicProvisioningMaintainerTest {
tester.nodeRepository().nodes().deprovision(hostToRemove.get(), Agent.system, tester.clock().instant());
// Redeployment of config server application retires node
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
assertTrue("Redeployment retires node", nodeToRemove.get().allocation().get().membership().retired());
// Config server becomes removable (done by RetiredExpirer in a real system) and redeployment moves it
// to inactive
tester.nodeRepository().nodes().setRemovable(configSrvApp, List.of(nodeToRemove.get()));
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
assertEquals("Node moves to inactive", Node.State.inactive, nodeToRemove.get().state());
// Node is completely removed (done by InactiveExpirer and host-admin in a real system)
Node inactiveConfigServer = nodeToRemove.get();
int removedIndex = inactiveConfigServer.allocation().get().membership().index();
tester.nodeRepository().nodes().removeRecursively(inactiveConfigServer, true);
- assertEquals(2, tester.nodeRepository().nodes().list().nodeType(NodeType.config).size());
+ assertEquals(2, tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType()).size());
// ExpiredRetirer moves host to inactive after child has moved to parked
tester.nodeRepository().nodes().deallocate(hostToRemove.get(), Agent.system, getClass().getSimpleName());
@@ -477,38 +501,38 @@ public class DynamicProvisioningMaintainerTest {
// Host is removed
dynamicProvisioningTester.maintainer.maintain();
- assertEquals(2, tester.nodeRepository().nodes().list().nodeType(NodeType.confighost).size());
+ assertEquals(2, tester.nodeRepository().nodes().list().nodeType(hostType).size());
// Deployment by the removed host has no effect
HostName.setHostNameForTestingOnly("cfg2.example.com");
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
assertEquals(List.of(), dynamicProvisioningTester.hostProvisioner.provisionedHosts());
// Deployment on another config server starts provisioning a new host and child
HostName.setHostNameForTestingOnly("cfg3.example.com");
- assertEquals(0, tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(NodeType.config).size());
- assertEquals(2, tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config).size());
- assertEquals(1, tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(NodeType.config).size());
- Node newNode = tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(NodeType.config).first().get();
+ assertEquals(0, tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(hostType.childNodeType()).size());
+ assertEquals(2, tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()).size());
+ assertEquals(1, tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(hostType.childNodeType()).size());
+ Node newNode = tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(hostType.childNodeType()).first().get();
// Resume provisioning and activate host
dynamicProvisioningTester.maintainer.maintain();
List<ProvisionedHost> newHosts = dynamicProvisioningTester.hostProvisioner.provisionedHosts();
assertEquals(1, newHosts.size());
tester.nodeRepository().nodes().setReady(newHosts.get(0).hostHostname(), Agent.operator, getClass().getSimpleName());
- tester.prepareAndActivateInfraApplication(hostApp, NodeType.confighost);
- assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.confighost).size());
+ tester.prepareAndActivateInfraApplication(hostApp, hostType);
+ assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(hostType).size());
// Redeployment of config server app actives new node
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
newNode = tester.nodeRepository().nodes().node(newNode.hostname()).get();
assertSame(Node.State.active, newNode.state());
assertEquals("Removed index is reused", removedIndex, newNode.allocation().get().membership().index());
// Next redeployment does nothing
- NodeList nodesBefore = tester.nodeRepository().nodes().list().nodeType(NodeType.config);
- tester.prepareAndActivateInfraApplication(configSrvApp, NodeType.config);
- NodeList nodesAfter = tester.nodeRepository().nodes().list().nodeType(NodeType.config);
+ NodeList nodesBefore = tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType());
+ tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
+ NodeList nodesAfter = tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType());
assertEquals(nodesBefore, nodesAfter);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
index 718facd477c..924d38cc6c2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
@@ -42,7 +42,6 @@ import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
@@ -191,25 +190,20 @@ public class RetiredExpirerTest {
// Redeploy to retire all 3 config servers
infraDeployer.activateAllSupportedInfraApplications(true);
+ List<Node> retiredNodes = tester.nodeRepository().nodes().list().retired().asList();
+ assertEquals(3, retiredNodes.size());
- // Only 1 config server is allowed to retire at any given point in time
- List<Node> retiredNodes = tester.nodeRepository().nodes().list(() -> {}).stream()
- .filter(node -> node.allocation().map(allocation -> allocation.membership().retired()).orElse(false))
- .collect(Collectors.toList());
- assertEquals(1, retiredNodes.size());
- Node retiredNode = retiredNodes.get(0);
- String retiredNodeHostname = retiredNode.hostname();
-
- // Allow retiredNodeHostname to be removed
+ // The Orchestrator will allow only 1 to be removed, say cfg1
+ Node retiredNode = tester.nodeRepository().nodes().node(cfg1.s()).orElseThrow();
doThrow(new OrchestrationException("denied")).when(orchestrator).acquirePermissionToRemove(any());
- doNothing().when(orchestrator).acquirePermissionToRemove(eq(new HostName(retiredNodeHostname)));
+ doNothing().when(orchestrator).acquirePermissionToRemove(eq(new HostName(retiredNode.hostname())));
// RetiredExpirer should remove cfg1 from application
RetiredExpirer retiredExpirer = createRetiredExpirer(deployer);
retiredExpirer.run();
var activeConfigServerHostnames = new HashSet<>(Set.of("cfg1", "cfg2", "cfg3"));
- assertTrue(activeConfigServerHostnames.contains(retiredNodeHostname));
- activeConfigServerHostnames.remove(retiredNodeHostname);
+ assertTrue(activeConfigServerHostnames.contains(retiredNode.hostname()));
+ activeConfigServerHostnames.remove(retiredNode.hostname());
assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel));
assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size());
assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size());
@@ -234,8 +228,8 @@ public class RetiredExpirerTest {
// Provision and ready new config server
MockNameResolver nameResolver = (MockNameResolver)tester.nodeRepository().nameResolver();
String ipv4 = "127.0.1.4";
- nameResolver.addRecord(retiredNodeHostname, ipv4);
- Node node = Node.create(retiredNodeHostname, new IP.Config(Set.of(ipv4), Set.of()), retiredNodeHostname,
+ nameResolver.addRecord(retiredNode.hostname(), ipv4);
+ Node node = Node.create(retiredNode.hostname(), new IP.Config(Set.of(ipv4), Set.of()), retiredNode.hostname(),
tester.asFlavor("default", NodeType.config), NodeType.config).build();
var nodes = List.of(node);
nodes = nodeRepository.nodes().addNodes(nodes, Agent.system);
@@ -252,14 +246,16 @@ public class RetiredExpirerTest {
infraDeployer.activateAllSupportedInfraApplications(true);
assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size());
- // Another config server should now have retired
+ // There are now 2 retired config servers left
retiredExpirer.run();
assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size());
- var retiredNodes2 = tester.nodeRepository().nodes().list(() -> {}).stream()
- .filter(n -> n.allocation().map(allocation -> allocation.membership().retired()).orElse(false))
- .collect(Collectors.toList());
- assertEquals(1, retiredNodes2.size());
- assertNotEquals(retiredNodeHostname, retiredNodes2.get(0));
+ var retiredHostnames = tester.nodeRepository()
+ .nodes().list(() -> {})
+ .stream()
+ .filter(n -> n.allocation().map(allocation -> allocation.membership().retired()).orElse(false))
+ .map(Node::hostname)
+ .collect(Collectors.toSet());
+ assertEquals(Set.of("cfg2", "cfg3"), retiredHostnames);
}
private Set<String> configServerHostnames(MockDuperModel duperModel) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
index 88d39e887d3..9ae67cef235 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
@@ -72,9 +72,9 @@ public class ScalingSuggestionsMaintainerTest {
new TestMetric());
maintainer.maintain();
- assertEquals("14 nodes with [vcpu: 6.9, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]",
+ assertEquals("12 nodes with [vcpu: 6.0, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
- assertEquals("9 nodes with [vcpu: 13.8, memory: 4.0 Gb, disk 10.3 Gb, bandwidth: 0.1 Gbps]",
+ assertEquals("8 nodes with [vcpu: 11.0, memory: 4.0 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps]",
suggestionOf(app2, cluster2, tester).get().resources().toString());
// Utilization goes way down
@@ -82,14 +82,14 @@ public class ScalingSuggestionsMaintainerTest {
addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository(), metricsDb);
maintainer.maintain();
assertEquals("Suggestion stays at the peak value observed",
- "14 nodes with [vcpu: 6.9, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]",
+ "12 nodes with [vcpu: 6.0, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
// Utilization is still way down and a week has passed
tester.clock().advance(Duration.ofDays(7));
addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository(), metricsDb);
maintainer.maintain();
assertEquals("Peak suggestion has been outdated",
- "6 nodes with [vcpu: 2.0, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps]",
+ "5 nodes with [vcpu: 1.8, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps]",
suggestionOf(app1, cluster1, tester).get().resources().toString());
assertTrue(shouldSuggest(app1, cluster1, tester));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
index 6e50c934047..e94d1c1230e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
@@ -19,10 +19,10 @@ import java.time.Duration;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
/**
@@ -183,7 +183,6 @@ public class NodeTypeProvisioningTest {
List<Node> nodesToRetire = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).asList()
.subList(3, 3 + numNodesToRetire);
- String currentyRetiringHostname;
{
nodesToRetire.forEach(nodeToRetire ->
tester.nodeRepository().nodes().write(nodeToRetire.withWantToRetire(true, Agent.system, tester.clock().instant()), () -> {}));
@@ -198,14 +197,13 @@ public class NodeTypeProvisioningTest {
List<Node> nodesCurrentlyRetiring = nodes.stream()
.filter(node -> node.allocation().get().membership().retired())
.collect(Collectors.toList());
- assertEquals(1, nodesCurrentlyRetiring.size());
+ assertEquals(5, nodesCurrentlyRetiring.size());
- // The retiring node should be one of the nodes we marked for retirement
- currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
- assertEquals(1, nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count());
+ // The retiring nodes should be the nodes we marked for retirement
+ assertTrue(Set.copyOf(nodesToRetire).containsAll(nodesCurrentlyRetiring));
}
- { // Redeploying while the node is still retiring has no effect
+ { // Redeploying while the nodes are still retiring has no effect
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
@@ -216,57 +214,29 @@ public class NodeTypeProvisioningTest {
List<Node> nodesCurrentlyRetiring = nodes.stream()
.filter(node -> node.allocation().get().membership().retired())
.collect(Collectors.toList());
- assertEquals(1, nodesCurrentlyRetiring.size());
-
- // The node that started retiring is still the only one retiring
- assertEquals(currentyRetiringHostname, nodesCurrentlyRetiring.get(0).hostname());
+ assertEquals(5, nodesCurrentlyRetiring.size());
}
{
+ // Let all retired nodes expire
tester.advanceTime(Duration.ofMinutes(11));
retiredExpirer.run();
List<HostSpec> hosts = deployProxies(application, tester);
- assertEquals(10, hosts.size());
+ assertEquals(6, hosts.size());
tester.activate(application, new HashSet<>(hosts));
- NodeList nodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy);
- assertEquals(10, nodes.size());
- // Verify the node we previously set to retire has finished retiring
- assertEquals(Node.State.dirty, tester.nodeRepository().nodes().node(currentyRetiringHostname)
- .orElseThrow(RuntimeException::new).state());
-
- // Verify that a node is currently retiring
- List<Node> nodesCurrentlyRetiring = nodes.stream()
- .filter(node -> node.allocation().get().membership().retired())
- .collect(Collectors.toList());
- assertEquals(1, nodesCurrentlyRetiring.size());
+ // All currently active proxy nodes are not marked with wantToRetire or as retired
+ long numRetiredActiveProxyNodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).stream()
+ .filter(node -> !node.status().wantToRetire())
+ .filter(node -> !node.allocation().get().membership().retired())
+ .count();
+ assertEquals(6, numRetiredActiveProxyNodes);
- // This node is different from the one that was retiring previously
- String newRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
- assertNotEquals(currentyRetiringHostname, newRetiringHostname);
- // ... but is one of the nodes that were put to wantToRetire earlier
- assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(newRetiringHostname)).count() == 1);
+ // All the nodes that were marked with wantToRetire earlier are now dirty
+ assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()),
+ tester.nodeRepository().nodes().list(Node.State.dirty).stream().map(Node::hostname).collect(Collectors.toSet()));
}
-
-
- for (int i = 0; i < 10; i++){
- tester.advanceTime(Duration.ofMinutes(11));
- retiredExpirer.run();
- List<HostSpec> hosts = deployProxies(application, tester);
- tester.activate(application, new HashSet<>(hosts));
- }
-
- // After a long time, all currently active proxy nodes are not marked with wantToRetire or as retired
- long numRetiredActiveProxyNodes = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.proxy).stream()
- .filter(node -> !node.status().wantToRetire())
- .filter(node -> !node.allocation().get().membership().retired())
- .count();
- assertEquals(11 - numNodesToRetire, numRetiredActiveProxyNodes);
-
- // All the nodes that were marked with wantToRetire earlier are now dirty
- assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()),
- tester.nodeRepository().nodes().list(Node.State.dirty).stream().map(Node::hostname).collect(Collectors.toSet()));
}
private List<HostSpec> deployProxies(ApplicationId application, ProvisioningTester tester) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
index 1e9a2d60837..65e07c46242 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json
@@ -66,7 +66,7 @@
},
"utilization" : {
"cpu" : 0.0,
- "idealCpu": 0.2,
+ "idealCpu": 0.275,
"memory" : 0.0,
"idealMemory": 0.7,
"disk" : 0.0,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
index 376b748ff8e..ecab55d19d4 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
@@ -42,7 +42,7 @@
},
"utilization" : {
"cpu" : 0.0,
- "idealCpu": 0.19047619047619047,
+ "idealCpu": 0.2664285714285714,
"memory" : 0.0,
"idealMemory": 0.7,
"disk" : 0.0,