summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src/main/java/com')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java32
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java58
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java100
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java38
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java78
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java52
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java72
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java111
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ArchiveUris.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java3
26 files changed, 421 insertions, 237 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
index 085b89d1253..1460ce70686 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
@@ -26,16 +26,23 @@ import java.util.Set;
*/
public class NodeRepoStats {
+ private final double totalCost;
+ private final double totalAllocatedCost;
private final Load load;
private final Load activeLoad;
private final List<ApplicationStats> applicationStats;
- private NodeRepoStats(Load load, Load activeLoad, List<ApplicationStats> applicationStats) {
+ private NodeRepoStats(double totalCost, double totalAllocatedCost, Load load, Load activeLoad, List<ApplicationStats> applicationStats) {
+ this.totalCost = totalCost;
+ this.totalAllocatedCost = totalAllocatedCost;
this.load = load;
this.activeLoad = activeLoad;
this.applicationStats = List.copyOf(applicationStats);
}
+ public double totalCost() { return totalCost; }
+ public double totalAllocatedCost() { return totalAllocatedCost; }
+
/**
* Returns the current average work-extracting utilization in this node repo over all nodes.
* Capacity not allocated to active nodes are taken to have 0 utilization as it provides no useful work.
@@ -50,11 +57,15 @@ public class NodeRepoStats {
public static NodeRepoStats computeOver(NodeRepository nodeRepository) {
NodeList allNodes = nodeRepository.nodes().list();
- List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of());
+ double totalCost = allNodes.hosts().stream().mapToDouble(host -> host.resources().cost()).sum();
+ double totalAllocatedCost = allNodes.not().hosts().stream()
+ .filter(node -> node.allocation().isPresent())
+ .mapToDouble(node -> node.resources().cost()).sum();
+ List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of());
Pair<Load, Load> load = computeLoad(allNodes, allNodeTimeseries);
List<ApplicationStats> applicationStats = computeApplicationStats(allNodes, allNodeTimeseries);
- return new NodeRepoStats(load.getFirst(), load.getSecond(), applicationStats);
+ return new NodeRepoStats(totalCost, totalAllocatedCost, load.getFirst(), load.getSecond(), applicationStats);
}
private static Pair<Load, Load> computeLoad(NodeList allNodes, List<NodeTimeseries> allNodeTimeseries) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index 88a4b492a0b..4a1545cc66c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -64,6 +64,33 @@ public class AllocatableClusterResources {
this.fulfilment = fulfilment(realResources, idealResources);
}
+ private AllocatableClusterResources(int nodes,
+ int groups,
+ NodeResources realResources,
+ NodeResources advertisedResources,
+ ClusterSpec clusterSpec,
+ double fulfilment) {
+ this.nodes = nodes;
+ this.groups = groups;
+ this.realResources = realResources;
+ this.advertisedResources = advertisedResources;
+ this.clusterSpec = clusterSpec;
+ this.fulfilment = fulfilment;
+ }
+
+ /** Returns this with the redundant node or group removed from counts. */
+ public AllocatableClusterResources withoutRedundancy() {
+ int groupSize = nodes / groups;
+ int nodesAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+ int groupsAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
+ return new AllocatableClusterResources(nodesAdjustedForRedundancy,
+ groupsAdjustedForRedundancy,
+ realResources,
+ advertisedResources,
+ clusterSpec,
+ fulfilment);
+ }
+
/**
* Returns the resources which will actually be available per node in this cluster with this allocation.
* These should be used for reasoning about allocation to meet measured demand.
@@ -83,11 +110,6 @@ public class AllocatableClusterResources {
public int nodes() { return nodes; }
public int groups() { return groups; }
- public int groupSize() {
- // ceil: If the division does not produce a whole number we assume some node is missing
- return (int)Math.ceil((double)nodes / groups);
- }
-
public ClusterSpec clusterSpec() { return clusterSpec; }
public double cost() { return nodes * advertisedResources.cost(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 41fa9499353..2befd69f893 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -20,11 +19,6 @@ public class AllocationOptimizer {
private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2
private static final int maximumNodes = 150;
- // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
- // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
- // TODO: Measure this, and only take it into account with queries
- private static final double fixedCpuCostFraction = 0.1;
-
private final NodeRepository nodeRepository;
public AllocationOptimizer(NodeRepository nodeRepository) {
@@ -32,13 +26,13 @@ public class AllocationOptimizer {
}
/**
- * Searches the space of possible allocations given a target
+ * Searches the space of possible allocations given a target relative load
* and (optionally) cluster limits and returns the best alternative.
*
* @return the best allocation, if there are any possible legal allocations, fulfilling the target
* fully or partially, within the limits
*/
- public Optional<AllocatableClusterResources> findBestAllocation(ResourceTarget target,
+ public Optional<AllocatableClusterResources> findBestAllocation(Load targetLoad,
AllocatableClusterResources current,
ClusterModel clusterModel,
Limits limits) {
@@ -53,18 +47,11 @@ public class AllocationOptimizer {
for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) {
for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) {
if (nodes % groups != 0) continue;
- int groupSize = nodes / groups;
-
- // Adjust for redundancy: Node in group if groups = 1, an extra group if multiple groups
- // TODO: Make the best choice based on size and redundancy setting instead
- int nodesAdjustedForRedundancy = target.adjustForRedundancy() ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
- int groupsAdjustedForRedundancy = target.adjustForRedundancy() ? (groups == 1 ? 1 : groups - 1) : groups;
ClusterResources next = new ClusterResources(nodes,
groups,
- nodeResourcesWith(nodesAdjustedForRedundancy,
- groupsAdjustedForRedundancy,
- limits, target, current, clusterModel));
+ nodeResourcesWith(nodes, groups,
+ limits, targetLoad, current, clusterModel));
var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits,
hosts, nodeRepository);
if (allocatableResources.isEmpty()) continue;
@@ -77,42 +64,25 @@ public class AllocationOptimizer {
/**
* For the observed load this instance is initialized with, returns the resources needed per node to be at
- * ideal load given a target node count
+ * the target relative load, given a target node and group count.
*/
private NodeResources nodeResourcesWith(int nodes,
int groups,
Limits limits,
- ResourceTarget target,
+ Load targetLoad,
AllocatableClusterResources current,
ClusterModel clusterModel) {
- double cpu, memory, disk;
- int groupSize = nodes / groups;
-
- if (current.clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
- // Cpu: Query cpu scales with cluster size, write cpu scales with group size
- // Memory and disk: Scales with group size
-
- // The fixed cost portion of cpu does not scale with changes to the node count
- double queryCpuPerGroup = fixedCpuCostFraction * target.resources().vcpu() +
- (1 - fixedCpuCostFraction) * target.resources().vcpu() * current.groupSize() / groupSize;
- double queryCpu = queryCpuPerGroup * current.groups() / groups;
- double writeCpu = target.resources().vcpu() * current.groupSize() / groupSize;
- cpu = clusterModel.queryCpuFraction() * queryCpu + (1 - clusterModel.queryCpuFraction()) * writeCpu;
- memory = target.resources().memoryGb() * current.groupSize() / groupSize;
- disk = target.resources().diskGb() * current.groupSize() / groupSize;
- }
- else {
- cpu = target.resources().vcpu() * current.nodes() / nodes;
- memory = target.resources().memoryGb();
- disk = target.resources().diskGb();
- }
+ var scaled = targetLoad // redundancy aware target relative to current load
+ .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
+ .divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment
+ .scaled(current.realResources().nodeResources());
// Combine the scaled resource values computed here
// with the currently configured non-scaled values, given in the limits, if any
- NodeResources nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
- ? current.advertisedResources().nodeResources()
- : limits.min().nodeResources(); // min=max for non-scaled
- return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk);
+ var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
+ ? current.advertisedResources().nodeResources()
+ : limits.min().nodeResources(); // min=max for non-scaled
+ return nonScaled.withVcpu(scaled.vcpu()).withMemoryGb(scaled.memoryGb()).withDiskGb(scaled.diskGb());
}
/** Returns a copy of the given limits where the minimum nodes are at least the given value when allowed */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index c2e66d39861..7a02fa9eb7e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -26,7 +26,7 @@ public class Autoscaler {
/** What cost difference is worth a reallocation? */
private static final double costDifferenceWorthReallocation = 0.1;
/** What resource difference is worth a reallocation? */
- private static final double resourceDifferenceWorthReallocation = 0.1;
+ private static final double resourceDifferenceWorthReallocation = 0.03;
private final NodeRepository nodeRepository;
private final AllocationOptimizer allocationOptimizer;
@@ -61,8 +61,8 @@ public class Autoscaler {
private Advice autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) {
ClusterModel clusterModel = new ClusterModel(application,
- cluster,
clusterNodes.clusterSpec(),
+ cluster,
clusterNodes,
nodeRepository.metricsDb(),
nodeRepository.clock());
@@ -89,10 +89,8 @@ public class Autoscaler {
" nodes, but require from " + clusterNodes.size());
var currentAllocation = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository);
- var target = ResourceTarget.idealLoad(clusterModel, currentAllocation);
-
Optional<AllocatableClusterResources> bestAllocation =
- allocationOptimizer.findBestAllocation(target, currentAllocation, clusterModel, limits);
+ allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits);
if (bestAllocation.isEmpty())
return Advice.dontScale(Status.insufficient, "No allocations are possible within configured limits");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index b7a5c1e7fe7..ae18e7ffb91 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -27,14 +27,17 @@ public class ClusterModel {
/** Containers typically use more cpu right after generation change, so discard those metrics */
public static final Duration warmupDuration = Duration.ofMinutes(5);
- private static final Duration currentLoadDuration = Duration.ofMinutes(5);
-
static final double idealQueryCpuLoad = 0.8;
static final double idealWriteCpuLoad = 0.95;
static final double idealMemoryLoad = 0.65;
static final double idealContainerDiskLoad = 0.95;
static final double idealContentDiskLoad = 0.6;
+ // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
+ // proportional to document count. We must account for this when comparing configurations with more or fewer nodes.
+ // TODO: Measure this, and only take it into account with queries
+ private static final double fixedCpuCostFraction = 0.1;
+
private final Application application;
private final ClusterSpec clusterSpec;
private final Cluster cluster;
@@ -50,8 +53,8 @@ public class ClusterModel {
private Double maxQueryGrowthRate = null;
public ClusterModel(Application application,
- Cluster cluster,
ClusterSpec clusterSpec,
+ Cluster cluster,
NodeList clusterNodes,
MetricsDb metricsDb,
Clock clock) {
@@ -76,7 +79,7 @@ public class ClusterModel {
this.application = application;
this.clusterSpec = clusterSpec;
this.cluster = cluster;
- this.nodes = null;
+ this.nodes = NodeList.of();
this.clock = clock;
this.scalingDuration = scalingDuration;
@@ -88,6 +91,20 @@ public class ClusterModel {
public ClusterSpec clusterSpec() { return clusterSpec; }
public Cluster cluster() { return cluster; }
+ /** Returns the relative load adjustment that should be made to this cluster given available measurements. */
+ public Load loadAdjustment() {
+ if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change
+ /*
+ // Should we scale up?
+ Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad());
+ if (relativePeak.any(v -> v > 1))
+ return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale
+
+ // Should we scale down?
+ */
+ return averageLoad().divide(idealLoad());
+ }
+
/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
@@ -110,14 +127,78 @@ public class ClusterModel {
return queryFractionOfMax = clusterTimeseries().queryFractionOfMax(scalingDuration(), clock);
}
- /** Returns average load during the last {@link #currentLoadDuration} */
- public Load currentLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(currentLoadDuration)); }
+ /** Returns average of the last load reading from each node. */
+ public Load currentLoad() { return nodeTimeseries().currentLoad(); }
/** Returns average load during the last {@link #scalingDuration()} */
public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); }
+ /** The number of nodes this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ public int nodeCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().count();
+ return cluster.minResources().nodes();
+ }
+
+ /** The number of groups this cluster has, or will have if not deployed yet. */
+ // TODO: Make this the deployed, not current count
+ public int groupCount() {
+ if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count();
+ return cluster.minResources().groups();
+ }
+
+ public int groupSize() {
+ // ceil: If the division does not produce a whole number we assume some node is missing
+ return (int)Math.ceil((double)nodeCount() / groupCount());
+ }
+
+ /** Returns the relative load adjustment accounting for redundancy in this. */
+ public Load redundancyAdjustment() {
+ return loadWith(nodeCount(), groupCount());
+ }
+
+ /**
+ * Returns the relative load adjustment accounting for redundancy given these nodes+groups
+ * relative to node nodes+groups in this.
+ */
+ public Load loadWith(int trueNodes, int trueGroups) {
+ int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups);
+ int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups);
+ if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content
+ int groupSize = nodes / groups;
+
+ // Cpu: Query cpu scales with cluster size, write cpu scales with group size
+ // Memory and disk: Scales with group size
+
+ // The fixed cost portion of cpu does not scale with changes to the node count
+ double queryCpuPerGroup = fixedCpuCostFraction + (1 - fixedCpuCostFraction) * groupSize() / groupSize;
+
+ double queryCpu = queryCpuPerGroup * groupCount() / groups;
+ double writeCpu = (double)groupSize() / groupSize;
+ return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu,
+ (double)groupSize() / groupSize,
+ (double)groupSize() / groupSize);
+ }
+ else {
+ return new Load((double)nodeCount() / nodes, 1, 1);
+ }
+ }
+
+ /**
+ * Returns the ideal load across the nodes of this sich that each node will be at ideal load
+ * if one of the nodes go down.
+ */
public Load idealLoad() {
- return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad());
+ return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()).divide(redundancyAdjustment());
+ }
+
+ public int nodesAdjustedForRedundancy(int nodes, int groups) {
+ int groupSize = (int)Math.ceil((double)nodes / groups);
+ return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes;
+ }
+
+ public int groupsAdjustedForRedundancy(int nodes, int groups) {
+ return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups;
}
/** Ideal cpu load must take the application traffic fraction into account */
@@ -171,7 +252,6 @@ public class ClusterModel {
// Assume we have missed timely recording completion if it is longer than 4 days
totalDuration = totalDuration.plus(maximum(Duration.ofDays(4), event.duration().get()));
}
-
if (completedEventCount == 0) { // Use defaults
if (clusterSpec.isStateful()) return Duration.ofHours(12);
return Duration.ofMinutes(10);
@@ -212,13 +292,13 @@ public class ClusterModel {
* as QuestDb is known to temporarily fail during reading of data.
*/
public static Optional<ClusterModel> create(Application application,
- Cluster cluster,
ClusterSpec clusterSpec,
+ Cluster cluster,
NodeList clusterNodes,
MetricsDb metricsDb,
Clock clock) {
try {
- return Optional.of(new ClusterModel(application, cluster, clusterSpec, clusterNodes, metricsDb, clock));
+ return Optional.of(new ClusterModel(application, clusterSpec, cluster, clusterNodes, metricsDb, clock));
}
catch (Exception e) {
log.log(Level.WARNING, "Failed creating a cluster model for " + application + " " + cluster, e);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
index 5ad4ef2e263..ab5be045dd4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java
@@ -7,6 +7,8 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
+import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@@ -48,6 +50,7 @@ public class ClusterNodesTimeseries {
/** Returns the average number of measurements per node */
public int measurementsPerNode() {
+ if (clusterNodes.size() == 0) return 0;
int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum();
return measurementCount / clusterNodes.size();
}
@@ -69,6 +72,41 @@ public class ClusterNodesTimeseries {
return total.divide(count);
}
+ /** Returns average of the latest load reading from each node */
+ public Load currentLoad() {
+ Load total = Load.zero();
+ int count = 0;
+ for (var nodeTimeseries : timeseries) {
+ Optional<NodeMetricSnapshot> last = nodeTimeseries.last();
+ if (last.isEmpty()) continue;
+
+ total = total.add(last.get().load());
+ count++;
+ }
+ return total.divide(count);
+ }
+
+ /**
+ * Returns the "peak load" in this: Which is for each load dimension,
+ * the average of the highest reading for that dimension on each node.
+ */
+ public Load peakLoad() {
+ return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk));
+ }
+
+ private double peakLoad(Load.Dimension dimension) {
+ double total = 0;
+ int count = 0;
+ for (var nodeTimeseries : timeseries) {
+ OptionalDouble value = nodeTimeseries.peak(dimension);
+ if (value.isEmpty()) continue;
+ total += value.getAsDouble();
+ count++;
+ }
+ if (count == 0) return 0;
+ return total / count;
+ }
+
private static List<NodeTimeseries> keep(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) {
return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.keep(filter)).collect(Collectors.toList());
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
index a52b048a9e0..88c7e70cd35 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java
@@ -3,6 +3,12 @@ package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.NodeResources;
+import java.util.Objects;
+import java.util.function.DoubleBinaryOperator;
+import java.util.function.DoubleFunction;
+import java.util.function.DoubleUnaryOperator;
+import java.util.function.Predicate;
+
/**
* The load of a node or system, measured as fractions of max (1.0) in three dimensions.
*
@@ -10,6 +16,8 @@ import com.yahoo.config.provision.NodeResources;
*/
public class Load {
+ public enum Dimension { cpu, memory, disk }
+
private final double cpu, memory, disk;
public Load(double cpu, double memory, double disk) {
@@ -23,27 +31,51 @@ public class Load {
public double disk() { return disk; }
public Load add(Load other) {
- return new Load(cpu + other.cpu(), memory + other.memory(), disk + other.disk());
+ return join(other, (a, b) -> a + b);
}
public Load multiply(NodeResources resources) {
return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb());
}
-
public Load multiply(double factor) {
- return new Load(cpu * factor, memory * factor, disk * factor);
+ return map(v -> v * factor);
+ }
+ public Load multiply(Load other) {
+ return join(other, (a, b) -> a * b);
}
+ public Load divide(Load divisor) {
+ return join(divisor, (a, b) -> divide(a, b));
+ }
+ public Load divide(double divisor) {
+ return map(v -> divide(v, divisor));
+ }
public Load divide(NodeResources resources) {
return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb()));
}
- public Load divide(Load divisor) {
- return new Load(divide(cpu, divisor.cpu()), divide(memory, divisor.memory()), divide(disk, divisor.disk()));
+ /** Returns the load having the max value of this and the given load in each dimension. */
+ public Load max(Load other) {
+ return join(other, (a, b) -> Math.max(a, b));
}
- public Load divide(double divisor) {
- return new Load(divide(cpu, divisor), divide(memory, divisor), divide(disk, divisor));
+ /** Returns the load where the given function is applied to each dimension of this. */
+ public Load map(DoubleUnaryOperator f) {
+ return new Load(f.applyAsDouble(cpu),
+ f.applyAsDouble(memory),
+ f.applyAsDouble(disk));
+ }
+
+ /** Returns the load where the given function is applied to each dimension of this and the given load. */
+ public Load join(Load other, DoubleBinaryOperator f) {
+ return new Load(f.applyAsDouble(this.cpu(), other.cpu()),
+ f.applyAsDouble(this.memory(), other.memory()),
+ f.applyAsDouble(this.disk(), other.disk()));
+ }
+
+ /** Returns true if any dimension matches the predicate. */
+ public boolean any(Predicate<Double> test) {
+ return test.test(cpu) || test.test(memory) || test.test(disk);
}
public NodeResources scaled(NodeResources resources) {
@@ -52,6 +84,14 @@ public class Load {
.withDiskGb(disk * resources.diskGb());
}
+ public double get(Dimension dimension) {
+ return switch (dimension) {
+ case cpu -> cpu();
+ case memory -> memory();
+ case disk -> disk();
+ };
+ }
+
private double requireNormalized(double value, String name) {
if (Double.isNaN(value))
throw new IllegalArgumentException(name + " must be a number but is NaN");
@@ -60,17 +100,31 @@ public class Load {
return value;
}
+ private static double divide(double a, double b) {
+ if (a == 0 && b == 0) return 0;
+ return a / b;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if ( ! (o instanceof Load other)) return false;
+ if (other.cpu() != this.cpu()) return false;
+ if (other.memory() != this.memory()) return false;
+ if (other.disk() != this.disk()) return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() { return Objects.hash(cpu, memory, disk); }
+
@Override
public String toString() {
return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk";
}
public static Load zero() { return new Load(0, 0, 0); }
-
- private static double divide(double a, double b) {
- if (a == 0 && b == 0) return 0;
- return a / b;
- }
+ public static Load one() { return new Load(1, 1, 1); }
public static Load byDividing(NodeResources a, NodeResources b) {
return new Load(divide(a.vcpu(), b.vcpu()), divide(a.memoryGb(), b.memoryGb()), divide(a.diskGb(), b.diskGb()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
index 4a5f8972e11..500dbf0f66f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java
@@ -6,6 +6,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@@ -40,6 +41,10 @@ public class NodeTimeseries {
return Optional.of(snapshots.get(snapshots.size() - 1));
}
+ public OptionalDouble peak(Load.Dimension dimension) {
+ return snapshots.stream().mapToDouble(snapshot -> snapshot.load().get(dimension)).max();
+ }
+
public List<NodeMetricSnapshot> asList() { return snapshots; }
public String hostname() { return hostname; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
deleted file mode 100644
index 72836baaf5b..00000000000
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.provision.autoscale;
-
-import com.yahoo.config.provision.NodeResources;
-import com.yahoo.vespa.hosted.provision.applications.Application;
-
-import java.time.Clock;
-import java.time.Duration;
-import java.util.OptionalDouble;
-
-/**
- * A resource target to hit for the allocation optimizer.
- * The target is measured in cpu, memory and disk per node in the allocation given by current.
- *
- * @author bratseth
- */
-public class ResourceTarget {
-
- private final boolean adjustForRedundancy;
-
- /** The target real resources per node, assuming the node assignment where this was decided */
- private final NodeResources resources;
-
- private ResourceTarget(NodeResources resources, boolean adjustForRedundancy) {
- this.resources = resources;
- this.adjustForRedundancy = adjustForRedundancy;
- }
-
- /** Are the target resources given by this including redundancy or not */
- public boolean adjustForRedundancy() { return adjustForRedundancy; }
-
- /** Returns the target resources per node in terms of the current allocation */
- public NodeResources resources() { return resources; }
-
- @Override
- public String toString() {
- return "target " + resources + (adjustForRedundancy ? "(with redundancy adjustment) " : "");
- }
-
- /** Create a target of achieving ideal load given a current load */
- public static ResourceTarget idealLoad(ClusterModel clusterModel,
- AllocatableClusterResources current) {
- var loadAdjustment = clusterModel.averageLoad().divide(clusterModel.idealLoad());
- return new ResourceTarget(loadAdjustment.scaled(current.realResources().nodeResources()), true);
- }
-
- /** Crete a target of preserving a current allocation */
- public static ResourceTarget preserve(AllocatableClusterResources current) {
- return new ResourceTarget(current.realResources().nodeResources(), false);
- }
-
-}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 9c6eb2199f5..a9e7ded66e6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -12,7 +12,6 @@ import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.lang.MutableInteger;
-import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.JacksonFlag;
import com.yahoo.vespa.flags.ListFlag;
@@ -77,16 +76,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
@Override
protected double maintain() {
- try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
- NodeList nodes = nodeRepository().nodes().list();
- resumeProvisioning(nodes, lock);
- convergeToCapacity(nodes);
- }
+ NodeList nodes = nodeRepository().nodes().list();
+ resumeProvisioning(nodes);
+ convergeToCapacity(nodes);
return 1.0;
}
/** Resume provisioning of already provisioned hosts and their children */
- private void resumeProvisioning(NodeList nodes, Mutex lock) {
+ private void resumeProvisioning(NodeList nodes) {
Map<String, Set<Node>> nodesByProvisionedParentHostname =
nodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller)
.asList()
@@ -97,9 +94,11 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost).forEach(host -> {
Set<Node> children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of());
try {
- List<Node> updatedNodes = hostProvisioner.provision(host, children);
- verifyDns(updatedNodes);
- nodeRepository().nodes().write(updatedNodes, lock);
+ try (var lock = nodeRepository().nodes().lockUnallocated()) {
+ List<Node> updatedNodes = hostProvisioner.provision(host, children);
+ verifyDns(updatedNodes);
+ nodeRepository().nodes().write(updatedNodes, lock);
+ }
} catch (IllegalArgumentException | IllegalStateException e) {
log.log(Level.INFO, "Could not provision " + host.hostname() + " with " + children.size() + " children, will retry in " +
interval() + ": " + Exceptions.toMessageString(e));
@@ -108,7 +107,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
" children, failing out the host recursively", e);
// Fail out as operator to force a quick redeployment
nodeRepository().nodes().failOrMarkRecursively(
- host.hostname(), Agent.operator, "Failed by HostProvisioner due to provisioning failure");
+ host.hostname(), Agent.DynamicProvisioningMaintainer, "Failed by HostProvisioner due to provisioning failure");
} catch (RuntimeException e) {
if (e.getCause() instanceof NameNotFoundException)
log.log(Level.INFO, "Could not provision " + host.hostname() + ", will retry in " + interval() + ": " + Exceptions.toMessageString(e));
@@ -187,29 +186,38 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
.collect(Collectors.toList());
}
- private List<Node> candidatesForRemoval(List<Node> nodes) {
- Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream()
- .filter(node -> {
- switch (node.type()) {
- case host:
- // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here
- return node.state() != Node.State.parked || node.status().wantToDeprovision();
- case confighost:
- case controllerhost:
- return node.state() == Node.State.parked && node.status().wantToDeprovision();
- default:
- return false;
- }
- })
- .collect(Collectors.toMap(Node::hostname, Function.identity())));
+ private static List<Node> candidatesForRemoval(List<Node> nodes) {
+ Map<String, Node> removableHostsByHostname = new HashMap<>();
+ for (var node : nodes) {
+ if (canRemoveHost(node)) {
+ removableHostsByHostname.put(node.hostname(), node);
+ }
+ }
+ for (var node : nodes) {
+ if (node.parentHostname().isPresent() && !canRemoveNode(node)) {
+ removableHostsByHostname.remove(node.parentHostname().get());
+ }
+ }
+ return List.copyOf(removableHostsByHostname.values());
+ }
- nodes.stream()
- .filter(node -> node.allocation().isPresent())
- .flatMap(node -> node.parentHostname().stream())
- .distinct()
- .forEach(hostsByHostname::remove);
+ private static boolean canRemoveHost(Node host) {
+ return switch (host.type()) {
+ // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here
+ case host -> host.state() != Node.State.parked || host.status().wantToDeprovision();
+ case confighost, controllerhost -> canDeprovision(host);
+ default -> false;
+ };
+ }
+
+ private static boolean canRemoveNode(Node node) {
+ if (node.type().isHost()) throw new IllegalArgumentException("Node " + node + " is not a child");
+ return node.allocation().isEmpty() || canDeprovision(node);
+ }
- return List.copyOf(hostsByHostname.values());
+ private static boolean canDeprovision(Node node) {
+ return node.status().wantToDeprovision() && (node.state() == Node.State.parked ||
+ node.state() == Node.State.failed);
}
private Map<String, Node> findSharedHosts(NodeList nodeList) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 62557b275c8..67c1c7359f7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -198,11 +198,6 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
metric.set("wantToDeprovision", node.status().wantToDeprovision() ? 1 : 0, context);
metric.set("failReport", NodeFailer.reasonsToFailHost(node).isEmpty() ? 0 : 1, context);
- if (node.type().isHost()) {
- metric.set("wantToEncrypt", node.reports().getReport("wantToEncrypt").isPresent() ? 1 : 0, context);
- metric.set("diskEncrypted", node.reports().getReport("diskEncrypted").isPresent() ? 1 : 0, context);
- }
-
HostName hostname = new HostName(node.hostname());
serviceModel.getApplication(hostname)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index 3c5b20da4d0..3e7abe8f053 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
@@ -19,9 +20,11 @@ import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.time.Instant;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.logging.Level;
@@ -106,26 +109,6 @@ public class NodeFailer extends NodeRepositoryMaintainer {
failActive(failing);
}
- // Active hosts
- NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
- for (Node host : activeNodes.hosts().failing()) {
- if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
- Optional<NodeMutex> locked = Optional.empty();
- try {
- attempts++;
- locked = nodeRepository().nodes().lockAndGet(host);
- if (locked.isEmpty()) continue;
- nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
- "Host should be failed and have no tenant nodes");
- }
- catch (Exception e) {
- failures++;
- }
- finally {
- locked.ifPresent(NodeMutex::close);
- }
- }
-
int throttlingActive = Math.min(1, throttledHostFailures + throttledNodeFailures);
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
@@ -153,6 +136,9 @@ public class NodeFailer extends NodeRepositoryMaintainer {
Set<FailingNode> failingNodes = new HashSet<>();
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
+ for (Node host : activeNodes.hosts().failing())
+ failingNodes.add(new FailingNode(host, "Host should be failed and have no tenant nodes"));
+
for (Node node : activeNodes) {
Instant graceTimeStart = clock().instant().minus(nodeRepository().nodes().suspended(node) ? suspendedDownTimeLimit : downTimeLimit);
if (node.isDown() && node.history().hasEventBefore(History.Event.Type.down, graceTimeStart) && !applicationSuspended(node)) {
@@ -241,42 +227,61 @@ public class NodeFailer extends NodeRepositoryMaintainer {
deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(30));
if (deployment.isEmpty()) return false;
- try (Mutex lock = nodeRepository().nodes().lock(failing.node().allocation().get().owner())) {
- // If the active node that we are trying to fail is of type host, we need to successfully fail all
- // the children nodes running on it before we fail the host
- boolean allTenantNodesFailedOutSuccessfully = true;
+ // If the active node that we are trying to fail is of type host, we need to successfully fail all
+ // the children nodes running on it before we fail the host. Failing a child node in a dynamically
+ // provisioned zone may require provisioning new hosts that require the host application lock to be held,
+ // so we must release ours before failing the children.
+ List<FailingNode> activeChildrenToFail = new ArrayList<>();
+ try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) {
+ // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
+ if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner)))
+ return false;
+ if (lock.node().state() == Node.State.failed)
+ return true;
+ if (!Objects.equals(failing.node().state(), lock.node().state()))
+ return false;
+ failing = new FailingNode(lock.node(), failing.reason);
+
String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason();
for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) {
if (failingTenantNode.state() == Node.State.active) {
- allTenantNodesFailedOutSuccessfully &= failActive(new FailingNode(failingTenantNode, reasonForChildFailure));
- } else {
+ activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure));
+ } else if (failingTenantNode.state() != Node.State.failed) {
nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure);
}
}
- if (! allTenantNodesFailedOutSuccessfully) return false;
- wantToFail(failing.node(), true, lock);
- try {
- deployment.get().activate();
- return true;
- } catch (TransientException e) {
- log.log(Level.INFO, "Failed to redeploy " + failing.node().allocation().get().owner() +
- " with a transient error, will be retried by application maintainer: " +
- Exceptions.toMessageString(e));
- return true;
- } catch (RuntimeException e) {
- // Reset want to fail: We'll retry failing unless it heals in the meantime
- nodeRepository().nodes().node(failing.node().hostname())
- .ifPresent(n -> wantToFail(n, false, lock));
- log.log(Level.WARNING, "Could not fail " + failing.node() + " for " + failing.node().allocation().get().owner() +
- " for " + failing.reason() + ": " + Exceptions.toMessageString(e));
- return false;
+ if (activeChildrenToFail.isEmpty()) {
+ wantToFail(failing.node(), true, lock);
+ try {
+ deployment.get().activate();
+ return true;
+ } catch (TransientException e) {
+ log.log(Level.INFO, "Failed to redeploy " + failing.node().allocation().get().owner() +
+ " with a transient error, will be retried by application maintainer: " +
+ Exceptions.toMessageString(e));
+ return true;
+ } catch (RuntimeException e) {
+ // Reset want to fail: We'll retry failing unless it heals in the meantime
+ nodeRepository().nodes().node(failing.node().hostname())
+ .ifPresent(n -> wantToFail(n, false, lock));
+ log.log(Level.WARNING, "Could not fail " + failing.node() + " for " + failing.node().allocation().get().owner() +
+ " for " + failing.reason() + ": " + Exceptions.toMessageString(e));
+ return false;
+ }
}
}
+
+ // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned,
+ // so failActive() may take a long time to complete, but the remaining children should be fast.
+ activeChildrenToFail.forEach(this::failActive);
+
+ return false;
}
private void wantToFail(Node node, boolean wantToFail, Mutex lock) {
- nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, clock().instant()), lock);
+ if (!node.status().wantToFail())
+ nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, clock().instant()), lock);
}
/** Returns true if node failing should be throttled */
@@ -284,16 +289,24 @@ public class NodeFailer extends NodeRepositoryMaintainer {
if (throttlePolicy == ThrottlePolicy.disabled) return false;
Instant startOfThrottleWindow = clock().instant().minus(throttlePolicy.throttleWindow);
NodeList allNodes = nodeRepository().nodes().list();
- NodeList recentlyFailedNodes = allNodes.state(Node.State.failed)
- .matching(n -> n.history().hasEventAfter(History.Event.Type.failed,
- startOfThrottleWindow));
+ NodeList recentlyFailedNodes = allNodes
+ .matching(n -> n.status().wantToFail() ||
+ (n.state() == Node.State.failed &&
+ n.history().hasEventAfter(History.Event.Type.failed, startOfThrottleWindow)));
// Allow failing any node within policy
if (recentlyFailedNodes.size() < throttlePolicy.allowedToFailOf(allNodes.size())) return false;
// Always allow failing a minimum number of hosts
- if (node.parentHostname().isEmpty() &&
- recentlyFailedNodes.parents().size() < throttlePolicy.minimumAllowedToFail) return false;
+ if (node.parentHostname().isEmpty()) {
+ Set<String> parentsOfRecentlyFailedNodes = recentlyFailedNodes.stream()
+ .map(n -> n.parentHostname().orElse(n.hostname()))
+ .collect(Collectors.toSet());
+ long potentiallyFailed = parentsOfRecentlyFailedNodes.contains(node.hostname()) ?
+ parentsOfRecentlyFailedNodes.size() :
+ parentsOfRecentlyFailedNodes.size() + 1;
+ if (potentiallyFailed <= throttlePolicy.minimumAllowedToFail) return false;
+ }
// Always allow failing children of a failed host
if (recentlyFailedNodes.parentOf(node).isPresent()) return false;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index f32fd225427..aa1abb18d8c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -124,14 +124,14 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final NodeFailer.ThrottlePolicy throttlePolicy;
DefaultTimes(Zone zone, Deployer deployer) {
- autoscalingInterval = Duration.ofMinutes(15);
+ autoscalingInterval = Duration.ofMinutes(5);
dynamicProvisionerInterval = Duration.ofMinutes(3);
failedExpirerInterval = Duration.ofMinutes(10);
failGrace = Duration.ofMinutes(30);
infrastructureProvisionInterval = Duration.ofMinutes(3);
loadBalancerExpirerInterval = Duration.ofMinutes(5);
metricsInterval = Duration.ofMinutes(1);
- nodeFailerInterval = Duration.ofMinutes(15);
+ nodeFailerInterval = Duration.ofMinutes(9);
nodeFailureStatusUpdateInterval = Duration.ofMinutes(2);
nodeMetricsCollectionInterval = Duration.ofMinutes(1);
expeditedChangeRedeployInterval = Duration.ofMinutes(3);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
index ac804f99cd3..c2d4506a28c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.node;
import com.google.common.collect.ImmutableMap;
import com.yahoo.vespa.hosted.provision.Node;
+import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
@@ -50,6 +51,12 @@ public class History {
return builder.build();
}
+ /** Returns the age of this node as best as we can determine: The time since the first event registered for it */
+ public Duration age(Instant now) {
+ Instant oldestEventTime = events.values().stream().map(event -> event.at()).sorted().findFirst().orElse(now);
+ return Duration.between(oldestEventTime, now);
+ }
+
/** Returns the last event of given type, if it is present in this history */
public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index d750a3ef737..41a23ac21ff 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -212,12 +212,12 @@ public class Nodes {
return setReady(List.of(nodeToReady), agent, reason).get(0);
}
- /** Reserve nodes. This method does <b>not</b> lock the node repository */
+ /** Reserve nodes. This method does <b>not</b> lock the node repository. */
public List<Node> reserve(List<Node> nodes) {
return db.writeTo(Node.State.reserved, nodes, Agent.application, Optional.empty());
}
- /** Activate nodes. This method does <b>not</b> lock the node repository */
+ /** Activate nodes. This method does <b>not</b> lock the node repository. */
public List<Node> activate(List<Node> nodes, NestedTransaction transaction) {
return db.writeTo(Node.State.active, nodes, Agent.application, Optional.empty(), transaction);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java
index 8578e3eb5ec..2b790ff7392 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java
@@ -30,4 +30,5 @@ public class NodeListFilter {
public static Predicate<Node> from(List<Node> nodes) {
return makePredicate(nodes);
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
index 30fd2713017..4178d4a6328 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
+import java.time.Instant;
import java.util.Objects;
import java.util.Optional;
import java.util.logging.Logger;
@@ -39,8 +40,10 @@ public class DelegatingOsUpgrader implements OsUpgrader {
public void upgradeTo(OsVersionTarget target) {
NodeList activeNodes = nodeRepository.nodes().list(Node.State.active).nodeType(target.nodeType());
int numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(target.version()).size());
+ Instant now = nodeRepository.clock().instant();
NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version())
.osVersionIsBefore(target.version())
+ .matching(node -> canUpgradeAt(now, node))
.byIncreasingOsVersion()
.first(numberToUpgrade);
if (nodesToUpgrade.size() == 0) return;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
index 5310ef339ed..4140de76368 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java
@@ -2,6 +2,9 @@
package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+
+import java.time.Instant;
/**
* Interface for an OS upgrader.
@@ -16,4 +19,9 @@ public interface OsUpgrader {
/** Disable OS upgrade for all nodes of given type */
void disableUpgrade(NodeType type);
+ /** Returns whether node can upgrade at given instant */
+ default boolean canUpgradeAt(Instant instant, Node node) {
+ return true;
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
index 7c6d1cb69db..440046ab818 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
@@ -84,7 +84,7 @@ public class OsVersions {
Version target = Optional.ofNullable(change.targets().get(nodeType))
.map(OsVersionTarget::version)
.orElse(Version.emptyVersion);
- chooseUpgrader(nodeType, target).disableUpgrade(nodeType);
+ chooseUpgrader(nodeType, Optional.of(target)).disableUpgrade(nodeType);
return change.withoutTarget(nodeType);
});
}
@@ -120,7 +120,7 @@ public class OsVersions {
try (Lock lock = db.lockOsVersionChange()) {
OsVersionTarget target = readChange().targets().get(nodeType);
if (target == null) return; // No target set for this type
- OsUpgrader upgrader = chooseUpgrader(nodeType, target.version());
+ OsUpgrader upgrader = chooseUpgrader(nodeType, Optional.of(target.version()));
if (resume) {
upgrader.upgradeTo(target);
} else {
@@ -129,17 +129,23 @@ public class OsVersions {
}
}
+ /** Returns whether node can be upgraded now */
+ public boolean canUpgrade(Node node) {
+ return chooseUpgrader(node.type(), Optional.empty()).canUpgradeAt(nodeRepository.clock().instant(), node);
+ }
+
/** Returns the upgrader to use when upgrading given node type to target */
- private OsUpgrader chooseUpgrader(NodeType nodeType, Version target) {
+ private OsUpgrader chooseUpgrader(NodeType nodeType, Optional<Version> target) {
if (reprovisionToUpgradeOs) {
return new RetiringOsUpgrader(nodeRepository);
}
// Require rebuild if we have any nodes of this type on a major version lower than target
- boolean rebuildRequired = nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream()
+ boolean rebuildRequired = target.isPresent() &&
+ nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream()
.map(Node::status)
.map(Status::osVersion)
.anyMatch(osVersion -> osVersion.current().isPresent() &&
- osVersion.current().get().getMajor() < target.getMajor());
+ osVersion.current().get().getMajor() < target.get().getMajor());
if (rebuildRequired) {
return new RebuildingOsUpgrader(nodeRepository);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
index efc377e6cc3..f96effe9e10 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -47,7 +47,7 @@ public class RebuildingOsUpgrader implements OsUpgrader {
public void upgradeTo(OsVersionTarget target) {
NodeList allNodes = nodeRepository.nodes().list();
Instant now = nodeRepository.clock().instant();
- rebuildableHosts(target, allNodes).forEach(host -> rebuild(host, target.version(), now));
+ rebuildableHosts(target, allNodes, now).forEach(host -> rebuild(host, target.version(), now));
}
@Override
@@ -62,7 +62,7 @@ public class RebuildingOsUpgrader implements OsUpgrader {
return Math.max(0, limit - hostsOfType.rebuilding().size());
}
- private List<Node> rebuildableHosts(OsVersionTarget target, NodeList allNodes) {
+ private List<Node> rebuildableHosts(OsVersionTarget target, NodeList allNodes, Instant now) {
NodeList hostsOfTargetType = allNodes.nodeType(target.nodeType());
int rebuildLimit = rebuildLimit(target.nodeType(), hostsOfTargetType);
@@ -76,6 +76,7 @@ public class RebuildingOsUpgrader implements OsUpgrader {
NodeList candidates = hostsOfTargetType.state(Node.State.active)
.not().rebuilding()
.osVersionIsBefore(target.version())
+ .matching(node -> canUpgradeAt(now, node))
.byIncreasingOsVersion();
for (Node host : candidates) {
if (hostsToRebuild.size() == rebuildLimit) break;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index d923c78a929..79b7441cc34 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -26,6 +26,9 @@ public class RetiringOsUpgrader implements OsUpgrader {
private static final Logger LOG = Logger.getLogger(RetiringOsUpgrader.class.getName());
+ /** The duration this leaves new nodes alone before scheduling any upgrade */
+ static final Duration GRACE_PERIOD = Duration.ofDays(30);
+
protected final NodeRepository nodeRepository;
public RetiringOsUpgrader(NodeRepository nodeRepository) {
@@ -33,21 +36,27 @@ public class RetiringOsUpgrader implements OsUpgrader {
}
@Override
- public final void upgradeTo(OsVersionTarget target) {
+ public void upgradeTo(OsVersionTarget target) {
NodeList allNodes = nodeRepository.nodes().list();
Instant now = nodeRepository.clock().instant();
NodeList candidates = candidates(now, target, allNodes);
candidates.not().deprovisioning()
+ .matching(node -> canUpgradeAt(now, node))
.byIncreasingOsVersion()
.first(1)
.forEach(node -> deprovision(node, target.version(), now));
}
@Override
- public final void disableUpgrade(NodeType type) {
+ public void disableUpgrade(NodeType type) {
// No action needed in this implementation.
}
+ @Override
+ public boolean canUpgradeAt(Instant instant, Node node) {
+ return node.history().age(instant).compareTo(GRACE_PERIOD) > 0;
+ }
+
/** Returns nodes that are candidates for upgrade */
private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ArchiveUris.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ArchiveUris.java
index 1cd8bb0fccd..e057fabc4fc 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ArchiveUris.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ArchiveUris.java
@@ -7,11 +7,11 @@ import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
-
import java.time.Duration;
import java.util.Map;
import java.util.Optional;
import java.util.TreeMap;
+import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
@@ -63,7 +63,7 @@ public class ArchiveUris {
}));
}
- /** Set the docker image for nodes of given type */
+ /** Set (or remove, if archiveURI is empty) archive URI to use for given tenant */
public void setArchiveUri(TenantName tenant, Optional<String> archiveUri) {
try (Lock lock = db.lockArchiveUris()) {
Map<TenantName, String> archiveUris = new TreeMap<>(db.readArchiveUris());
@@ -73,7 +73,8 @@ public class ArchiveUris {
() -> archiveUris.remove(tenant));
db.writeArchiveUris(archiveUris);
this.archiveUris.invalidate(); // Throw away current cache
- log.info("Set archive URI for " + tenant + " to " + archiveUri.orElse(null));
+ log.log(Level.FINE, () -> archiveUri.map(s -> "Set archive URI for " + tenant + " to " + s)
+ .orElseGet(() -> "Remove archive URI for " + tenant));
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 64865c15529..065293ca5d8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -27,7 +27,7 @@ import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources;
import com.yahoo.vespa.hosted.provision.autoscale.AllocationOptimizer;
import com.yahoo.vespa.hosted.provision.autoscale.ClusterModel;
import com.yahoo.vespa.hosted.provision.autoscale.Limits;
-import com.yahoo.vespa.hosted.provision.autoscale.ResourceTarget;
+import com.yahoo.vespa.hosted.provision.autoscale.Load;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter;
import com.yahoo.vespa.hosted.provision.node.filter.NodeHostFilter;
@@ -171,7 +171,7 @@ public class NodeRepositoryProvisioner implements Provisioner {
firstDeployment // start at min, preserve current resources otherwise
? new AllocatableClusterResources(initialResourcesFrom(requested, clusterSpec, application.id()), clusterSpec, nodeRepository)
: new AllocatableClusterResources(nodes.asList(), nodeRepository);
- var clusterModel = new ClusterModel(application, cluster, clusterSpec, nodes, nodeRepository.metricsDb(), nodeRepository.clock());
+ var clusterModel = new ClusterModel(application, clusterSpec, cluster, nodes, nodeRepository.metricsDb(), nodeRepository.clock());
return within(Limits.of(requested), currentResources, firstDeployment, clusterModel);
}
@@ -196,7 +196,7 @@ public class NodeRepositoryProvisioner implements Provisioner {
if (! firstDeployment && currentAsAdvertised.isWithin(limits.min(), limits.max())) return currentAsAdvertised;
// Otherwise, find an allocation that preserves the current resources as well as possible
- return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(current),
+ return allocationOptimizer.findBestAllocation(Load.one(),
current,
clusterModel,
limits)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
index f0b8e77ee56..1c10de8498a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
@@ -62,7 +62,7 @@ public class ApplicationSerializer {
NodeList nodes = applicationNodes.not().retired().cluster(cluster.id());
if (nodes.isEmpty()) return;
ClusterResources currentResources = nodes.toResources();
- Optional<ClusterModel> clusterModel = ClusterModel.create(application, cluster, nodes.clusterSpec(), nodes, metricsDb, nodeRepository.clock());
+ Optional<ClusterModel> clusterModel = ClusterModel.create(application, nodes.clusterSpec(), cluster, nodes, metricsDb, nodeRepository.clock());
Cursor clusterObject = clustersObject.setObject(cluster.id().value());
clusterObject.setString("type", nodes.clusterSpec().type().name());
toSlime(cluster.minResources(), clusterObject.setObject("min"));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java
index 3659166c9da..87a9735f91e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java
@@ -162,6 +162,9 @@ class NodesResponse extends SlimeJsonResponse {
object.setLong("currentRebootGeneration", node.status().reboot().current());
node.status().osVersion().current().ifPresent(version -> object.setString("currentOsVersion", version.toFullString()));
node.status().osVersion().wanted().ifPresent(version -> object.setString("wantedOsVersion", version.toFullString()));
+ if (node.type().isHost()) {
+ object.setBool("deferOsUpgrade", !nodeRepository.osVersions().canUpgrade(node));
+ }
node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong("currentFirmwareCheck", instant.toEpochMilli()));
if (node.type().isHost())
nodeRepository.firmwareChecks().requiredAfter().ifPresent(after -> object.setLong("wantedFirmwareCheck", after.toEpochMilli()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index c5d8b2518e5..b3fbe124493 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -44,6 +44,7 @@ import com.yahoo.vespa.hosted.provision.node.filter.ParentHostFilter;
import com.yahoo.vespa.hosted.provision.restapi.NodesResponse.ResponseType;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.yolean.Exceptions;
+
import javax.inject.Inject;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -453,6 +454,8 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
Slime slime = new Slime();
Cursor root = slime.setObject();
+ root.setDouble("totalCost", stats.totalCost());
+ root.setDouble("totalAllocatedCost", stats.totalAllocatedCost());
toSlime(stats.load(), root.setObject("load"));
toSlime(stats.activeLoad(), root.setObject("activeLoad"));
Cursor applicationsArray = root.setArray("applications");