summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2020-06-18 09:10:48 +0200
committerGitHub <noreply@github.com>2020-06-18 09:10:48 +0200
commitc95cd618e0903ce0e5101ecb3593821bb399a9f9 (patch)
tree667aadb9fc67ab9d8f220ef298ccc82186c13ac2 /node-repository
parentb0bc165e64dbcbb9e8e357f92f46f8bf1c84dbb8 (diff)
parented30906d441364b95b99f52355d218a085246fa6 (diff)
Merge pull request #13619 from vespa-engine/bratseth/spare-capacity-maintainer
Bratseth/spare capacity maintainer
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java24
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java99
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityReportMaintainer.java58
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java105
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java33
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java104
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java337
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java (renamed from node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java)31
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java28
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/HostCapacityResponse.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java65
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java24
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java327
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AllocationVisualizer.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java (renamed from node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java)12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json6
39 files changed, 1057 insertions, 301 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index a9861497ca3..b6237886dc7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -128,6 +128,8 @@ public final class Node {
return parentHostname.isPresent() && parentHostname.get().equals(hostname);
}
+ public NodeResources resources() { return flavor.resources(); }
+
/** Returns the flavor of this node */
public Flavor flavor() { return flavor; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 1b2f73a2f5f..cbc5a44ae94 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -32,7 +32,7 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which are retired */
public NodeList retired() {
- return matching(node -> node.allocation().get().membership().retired());
+ return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().retired());
}
/** Returns the subset of nodes that are being deprovisioned */
@@ -42,17 +42,25 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which are removable */
public NodeList removable() {
- return matching(node -> node.allocation().get().isRemovable());
+ return matching(node -> node.allocation().isPresent() && node.allocation().get().isRemovable());
}
/** Returns the subset of nodes having exactly the given resources */
- public NodeList resources(NodeResources resources) { return matching(node -> node.flavor().resources().equals(resources)); }
+ public NodeList resources(NodeResources resources) { return matching(node -> node.resources().equals(resources)); }
+
+ /** Returns the subset of nodes which satisfy the given resources */
+ public NodeList satisfies(NodeResources resources) { return matching(node -> node.resources().satisfies(resources)); }
/** Returns the subset of nodes of the given flavor */
public NodeList flavor(String flavor) {
return matching(node -> node.flavor().name().equals(flavor));
}
+ /** Returns the subset of nodes not in the given collection */
+ public NodeList except(Collection<Node> nodes) {
+ return matching(node -> ! nodes.contains(node));
+ }
+
/** Returns the subset of nodes assigned to the given cluster type */
public NodeList type(ClusterSpec.Type type) {
return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().type().equals(type));
@@ -109,6 +117,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
return matching(node -> nodeTypes.contains(node.type()));
}
+ /** Returns the subset of nodes of the host type */
+ public NodeList hosts() {
+ return matching(node -> node.type() == NodeType.host);
+ }
+
/** Returns the subset of nodes that are parents */
public NodeList parents() {
return matching(n -> n.parentHostname().isEmpty());
@@ -133,6 +146,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
return matching(node -> nodeStates.contains(node.state()));
}
+ /** Returns the subset of nodes which wantToRetire set true */
+ public NodeList wantToRetire() {
+ return matching((node -> node.status().wantToRetire()));
+ }
+
/** Returns the parent nodes of the given child nodes */
public NodeList parentsOf(Collection<Node> children) {
return children.stream()
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index b41820a461b..bec35e7ee4f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -102,7 +102,7 @@ public class NodeRepository extends AbstractComponent {
private final DockerImages dockerImages;
private final JobControl jobControl;
private final Applications applications;
- private final boolean canProvisionHostsWhenRequired;
+ private final boolean canProvisionHosts;
/**
* Creates a node repository from a zookeeper provider.
@@ -136,7 +136,7 @@ public class NodeRepository extends AbstractComponent {
NameResolver nameResolver,
DockerImage dockerImage,
boolean useCuratorClientCache,
- boolean canProvisionHostsWhenRequired) {
+ boolean canProvisionHosts) {
this.db = new CuratorDatabaseClient(flavors, curator, clock, zone, useCuratorClientCache);
this.zone = zone;
this.clock = clock;
@@ -149,7 +149,7 @@ public class NodeRepository extends AbstractComponent {
this.dockerImages = new DockerImages(db, dockerImage);
this.jobControl = new JobControl(db);
this.applications = new Applications(db);
- this.canProvisionHostsWhenRequired = canProvisionHostsWhenRequired;
+ this.canProvisionHosts = canProvisionHosts;
// read and write all nodes to make sure they are stored in the latest version of the serialized format
for (State state : State.values())
@@ -800,16 +800,14 @@ public class NodeRepository extends AbstractComponent {
if (host.status().wantToRetire()) return false;
if (host.allocation().map(alloc -> alloc.membership().retired()).orElse(false)) return false;
- if ( canProvisionHostsWhenRequired())
+ if ( canProvisionHosts())
return EnumSet.of(State.active, State.ready, State.provisioned).contains(host.state());
else
return host.state() == State.active;
}
- /** Returns whether this has the ability to conjure hosts when required */
- public boolean canProvisionHostsWhenRequired() {
- return canProvisionHostsWhenRequired;
- }
+ /** Returns whether this repository can provision hosts on demand */
+ public boolean canProvisionHosts() { return canProvisionHosts; }
/** Returns the time keeper of this system */
public Clock clock() { return clock; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index e6cbddf96f2..267bfefa332 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -47,7 +47,7 @@ public class AllocatableClusterResources {
this.nodes = nodes.size();
this.groups = (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
this.realResources = averageRealResourcesOf(nodes, nodeRepository); // Average since we average metrics over nodes
- this.advertisedResources = nodes.get(0).flavor().resources();
+ this.advertisedResources = nodes.get(0).resources();
this.clusterType = nodes.get(0).allocation().get().membership().cluster().type();
this.fulfilment = 1;
}
@@ -125,11 +125,11 @@ public class AllocatableClusterResources {
NodeResources sum = new NodeResources(0, 0, 0, 0);
for (Node node : nodes)
sum = sum.add(nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).justNumbers());
- return nodes.get(0).flavor().resources().justNonNumbers()
- .withVcpu(sum.vcpu() / nodes.size())
- .withMemoryGb(sum.memoryGb() / nodes.size())
- .withDiskGb(sum.diskGb() / nodes.size())
- .withBandwidthGbps(sum.bandwidthGbps() / nodes.size());
+ return nodes.get(0).resources().justNonNumbers()
+ .withVcpu(sum.vcpu() / nodes.size())
+ .withMemoryGb(sum.memoryGb() / nodes.size())
+ .withDiskGb(sum.diskGb() / nodes.size())
+ .withBandwidthGbps(sum.bandwidthGbps() / nodes.size());
}
public static Optional<AllocatableClusterResources> from(ClusterResources wantedResources,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index fa8e8375e23..c32b7854d4e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -85,7 +85,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
int currentGroups = (int)clusterNodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type();
log.info("Autoscaling " + application + " " + clusterType + " " + clusterId + ":" +
- "\nfrom " + toString(clusterNodes.size(), currentGroups, clusterNodes.get(0).flavor().resources()) +
+ "\nfrom " + toString(clusterNodes.size(), currentGroups, clusterNodes.get(0).resources()) +
"\nto " + toString(target.nodes(), target.groups(), target.nodeResources()));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
index ca8399da629..f583728f9b8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
@@ -6,11 +6,15 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Allocation;
+import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
+/**
+ * @author mgimle
+ */
public class CapacityChecker {
private List<Node> hosts;
@@ -42,15 +46,15 @@ public class CapacityChecker {
}
public List<Node> nodesFromHostnames(List<String> hostnames) {
- List<Node> nodes = hostnames.stream()
- .filter(h -> nodeMap.containsKey(h))
- .map(h -> nodeMap.get(h))
- .collect(Collectors.toList());
+ List<Node> nodes = hostnames.stream().filter(h -> nodeMap.containsKey(h))
+ .map(h -> nodeMap.get(h))
+ .collect(Collectors.toList());
+
if (nodes.size() != hostnames.size()) {
Set<String> notFoundNodes = new HashSet<>(hostnames);
notFoundNodes.removeAll(nodes.stream().map(Node::hostname).collect(Collectors.toList()));
throw new IllegalArgumentException(String.format("Host(s) not found: [ %s ]",
- String.join(", ", notFoundNodes)));
+ String.join(", ", notFoundNodes)));
}
return nodes;
@@ -92,9 +96,9 @@ public class CapacityChecker {
if (hosts.size() == 0) return Optional.empty();
List<Node> parentRemovalPriorityList = heuristic.entrySet().stream()
- .sorted(Comparator.comparingInt(Map.Entry::getValue))
- .map(Map.Entry::getKey)
- .collect(Collectors.toList());
+ .sorted(this::hostMitigationOrder)
+ .map(Map.Entry::getKey)
+ .collect(Collectors.toList());
for (int i = 1; i <= parentRemovalPriorityList.size(); i++) {
List<Node> hostsToRemove = parentRemovalPriorityList.subList(0, i);
@@ -110,18 +114,25 @@ public class CapacityChecker {
throw new IllegalStateException("No path to failure found. This should be impossible!");
}
+ private int hostMitigationOrder(Map.Entry<Node, Integer> entry1, Map.Entry<Node, Integer> entry2) {
+ int result = Integer.compare(entry1.getValue(), entry2.getValue());
+ if (result != 0) return result;
+ // Mitigate the largest hosts first
+ return NodeResourceComparator.defaultOrder().compare(entry2.getKey().resources(), entry1.getKey().resources());
+ }
+
private Map<String, Node> constructHostnameToNodeMap(List<Node> nodes) {
return nodes.stream().collect(Collectors.toMap(Node::hostname, n -> n));
}
private Map<Node, List<Node>> constructNodeChildrenMap(List<Node> tenants, List<Node> hosts, Map<String, Node> hostnameToNode) {
Map<Node, List<Node>> nodeChildren = tenants.stream()
- .filter(n -> n.parentHostname().isPresent())
- .filter(n -> hostnameToNode.containsKey(n.parentHostname().get()))
- .collect(Collectors.groupingBy(
- n -> hostnameToNode.get(n.parentHostname().orElseThrow())));
+ .filter(n -> n.parentHostname().isPresent())
+ .filter(n -> hostnameToNode.containsKey(n.parentHostname().get()))
+ .collect(Collectors.groupingBy(n -> hostnameToNode.get(n.parentHostname().orElseThrow())));
- for (var host : hosts) nodeChildren.putIfAbsent(host, List.of());
+ for (var host : hosts)
+ nodeChildren.putIfAbsent(host, List.of());
return nodeChildren;
}
@@ -133,7 +144,7 @@ public class CapacityChecker {
int occupiedIps = 0;
Set<String> ipPool = host.ipAddressPool().asSet();
for (var child : nodeChildren.get(host)) {
- hostResources = hostResources.subtract(child.flavor().resources().justNumbers());
+ hostResources = hostResources.subtract(child.resources().justNumbers());
occupiedIps += child.ipAddresses().stream().filter(ipPool::contains).count();
}
availableResources.put(host, new AllocationResources(hostResources, host.ipAddressPool().asSet().size() - occupiedIps));
@@ -149,10 +160,8 @@ public class CapacityChecker {
private Map<Node, Integer> computeMaximalRepeatedRemovals(List<Node> hosts,
Map<Node, List<Node>> nodeChildren,
Map<Node, AllocationResources> availableResources) {
- Map<Node, Integer> timesNodeCanBeRemoved = hosts.stream().collect(Collectors.toMap(
- Function.identity(),
- __ -> Integer.MAX_VALUE
- ));
+ Map<Node, Integer> timesNodeCanBeRemoved = hosts.stream().collect(Collectors.toMap(Function.identity(),
+ __ -> Integer.MAX_VALUE));
for (Node host : hosts) {
List<Node> children = nodeChildren.get(host);
if (children.size() == 0) continue;
@@ -196,7 +205,8 @@ public class CapacityChecker {
/**
* Tests whether it's possible to remove the provided hosts.
* Does not mutate any input variable.
- * @return Empty optional if removal is possible, information on what caused the failure otherwise
+ *
+ * @return empty optional if removal is possible, information on what caused the failure otherwise
*/
private Optional<HostRemovalFailure> findHostRemovalFailure(List<Node> hostsToRemove, List<Node> allHosts,
Map<Node, List<Node>> nodechildren,
@@ -204,20 +214,24 @@ public class CapacityChecker {
var containedAllocations = collateAllocations(nodechildren);
var resourceMap = new HashMap<>(availableResources);
List<Node> validAllocationTargets = allHosts.stream()
- .filter(h -> !hostsToRemove.contains(h))
- .collect(Collectors.toList());
- if (validAllocationTargets.size() == 0) {
+ .filter(h -> !hostsToRemove.contains(h))
+ .collect(Collectors.toList());
+ if (validAllocationTargets.size() == 0)
return Optional.of(HostRemovalFailure.none());
- }
allocationHistory = new AllocationHistory();
for (var host : hostsToRemove) {
Optional<Node> unallocatedNode = tryAllocateNodes(nodechildren.get(host),
- validAllocationTargets, resourceMap, containedAllocations, true);
+ validAllocationTargets,
+ resourceMap,
+ containedAllocations,
+ true);
if (unallocatedNode.isPresent()) {
AllocationFailureReasonList failures = collateAllocationFailures(unallocatedNode.get(),
- validAllocationTargets, resourceMap, containedAllocations);
+ validAllocationTargets,
+ resourceMap,
+ containedAllocations);
return Optional.of(HostRemovalFailure.create(host, unallocatedNode.get(), failures));
}
}
@@ -248,7 +262,7 @@ public class CapacityChecker {
long eligibleParents =
hosts.stream().filter(h ->
!violatesParentHostPolicy(node, h, containedAllocations)
- && availableResources.get(h).satisfies(AllocationResources.from(node.flavor().resources()))).count();
+ && availableResources.get(h).satisfies(AllocationResources.from(node.resources()))).count();
allocationHistory.addEntry(node, newParent.get(), eligibleParents + 1);
}
}
@@ -300,7 +314,7 @@ public class CapacityChecker {
reason.violatesParentHostPolicy = violatesParentHostPolicy(node, host, containedAllocations);
NodeResources l = availableHostResources.nodeResources;
- NodeResources r = node.allocation().map(Allocation::requestedResources).orElse(node.flavor().resources());
+ NodeResources r = node.allocation().map(Allocation::requestedResources).orElse(node.resources());
if (l.vcpu() < r.vcpu())
reason.insufficientVcpu = true;
@@ -326,8 +340,15 @@ public class CapacityChecker {
* as well as the specific host and tenant which caused it.
*/
public static class HostFailurePath {
+
public List<Node> hostsCausingFailure;
public HostRemovalFailure failureReason;
+
+ @Override
+ public String toString() {
+ return "failure path: " + failureReason + " upon removing " + hostsCausingFailure;
+ }
+
}
/**
@@ -336,22 +357,21 @@ public class CapacityChecker {
* will be empty.
*/
public static class HostRemovalFailure {
+
public Optional<Node> host;
public Optional<Node> tenant;
public AllocationFailureReasonList allocationFailures;
public static HostRemovalFailure none() {
- return new HostRemovalFailure(
- Optional.empty(),
- Optional.empty(),
- new AllocationFailureReasonList(List.of()));
+ return new HostRemovalFailure(Optional.empty(),
+ Optional.empty(),
+ new AllocationFailureReasonList(List.of()));
}
public static HostRemovalFailure create(Node host, Node tenant, AllocationFailureReasonList failureReasons) {
- return new HostRemovalFailure(
- Optional.of(host),
- Optional.of(tenant),
- failureReasons);
+ return new HostRemovalFailure(Optional.of(host),
+ Optional.of(tenant),
+ failureReasons);
}
private HostRemovalFailure(Optional<Node> host, Optional<Node> tenant, AllocationFailureReasonList allocationFailures) {
@@ -362,7 +382,7 @@ public class CapacityChecker {
@Override
public String toString() {
- if (host.isEmpty() || tenant.isEmpty()) return "No removal candidates exists.";
+ if (host.isEmpty() || tenant.isEmpty()) return "No removal candidates exists";
return String.format(
"Failure to remove host %s" +
"\n\tNo new host found for tenant %s:" +
@@ -386,7 +406,7 @@ public class CapacityChecker {
if (node.allocation().isPresent())
return from(node.allocation().get().requestedResources());
else
- return from(node.flavor().resources());
+ return from(node.resources());
}
public static AllocationResources from(NodeResources nodeResources) {
@@ -406,6 +426,7 @@ public class CapacityChecker {
public AllocationResources subtract(AllocationResources other) {
return new AllocationResources(this.nodeResources.subtract(other.nodeResources), this.availableIPs - other.availableIPs);
}
+
}
/**
@@ -449,6 +470,7 @@ public class CapacityChecker {
return String.format("[%s]", String.join(", ", reasons));
}
+
}
/**
@@ -487,6 +509,7 @@ public class CapacityChecker {
insufficientVcpu(), insufficientMemoryGb(), insufficientDiskGb(), incompatibleDiskSpeed(),
incompatibleStorageType(), insufficientAvailableIps(), violatesParentHostPolicy());
}
+
}
public static class AllocationHistory {
@@ -506,7 +529,7 @@ public class CapacityChecker {
public String toString() {
return String.format("%-20s %-65s -> %15s [%3d valid]",
tenant.hostname().replaceFirst("\\..+", ""),
- tenant.flavor().resources(),
+ tenant.resources(),
newParent == null ? "x" : newParent.hostname().replaceFirst("\\..+", ""),
this.eligibleParents
);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityReportMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityReportMaintainer.java
deleted file mode 100644
index f6cadabec54..00000000000
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityReportMaintainer.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.provision.maintenance;
-
-import com.yahoo.jdisc.Metric;
-import com.yahoo.vespa.hosted.provision.Node;
-import com.yahoo.vespa.hosted.provision.NodeRepository;
-
-import java.time.Duration;
-import java.util.List;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.stream.Collectors;
-
-/**
- * Performs analysis on the node repository to produce metrics that pertain to the capacity of the node repository.
- * These metrics include:
- * Spare host capacity, or how many hosts the repository can stand to lose without ending up in a situation where it's
- * unable to find a new home for orphaned tenants.
- * Overcommitted hosts, which tracks if there are any hosts whose capacity is less than the sum of its children's.
- *
- * @author mgimle
- */
-public class CapacityReportMaintainer extends NodeRepositoryMaintainer {
-
- private final Metric metric;
- private final NodeRepository nodeRepository;
- private static final Logger log = Logger.getLogger(CapacityReportMaintainer.class.getName());
-
- CapacityReportMaintainer(NodeRepository nodeRepository,
- Metric metric,
- Duration interval) {
- super(nodeRepository, interval);
- this.nodeRepository = nodeRepository;
- this.metric = Objects.requireNonNull(metric);
- }
-
- @Override
- protected void maintain() {
- if (nodeRepository.zone().getCloud().dynamicProvisioning()) return; // Hosts and nodes are 1-1
-
- CapacityChecker capacityChecker = new CapacityChecker(this.nodeRepository);
- List<Node> overcommittedHosts = capacityChecker.findOvercommittedHosts();
- if (overcommittedHosts.size() != 0) {
- log.log(Level.WARNING, String.format("%d nodes are overcommitted! [ %s ]", overcommittedHosts.size(),
- overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", "))));
- }
- metric.set("overcommittedHosts", overcommittedHosts.size(), null);
-
- Optional<CapacityChecker.HostFailurePath> failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
- if (failurePath.isPresent()) {
- int worstCaseHostLoss = failurePath.get().hostsCausingFailure.size();
- metric.set("spareHostCapacity", worstCaseHostLoss - 1, null);
- }
- }
-
-}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index 18471637da7..4e1be9c486c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -7,10 +7,13 @@ import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.TransientException;
import com.yahoo.jdisc.Metric;
+
+import java.util.Objects;
import java.util.logging.Level;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.yolean.Exceptions;
import java.io.Closeable;
@@ -128,4 +131,106 @@ class MaintenanceDeployment implements Closeable {
return "deployment of " + application;
}
+ public static class Move {
+
+ private final Node node;
+ private final Node fromHost, toHost;
+
+ Move(Node node, Node fromHost, Node toHost) {
+ this.node = node;
+ this.fromHost = fromHost;
+ this.toHost = toHost;
+ }
+
+ public Node node() { return node; }
+ public Node fromHost() { return fromHost; }
+ public Node toHost() { return toHost; }
+
+ /**
+ * Try to deploy to make this move.
+ *
+ * @param verifyTarget true to only make this move if the node ends up at the expected target host,
+ * false if we should perform it as long as it moves from the source host
+ * @return true if the move was done, false if it couldn't be
+ */
+ public boolean execute(boolean verifyTarget,
+ Agent agent, Deployer deployer, Metric metric, NodeRepository nodeRepository) {
+ if (isEmpty()) return false;
+ ApplicationId application = node.allocation().get().owner();
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) {
+ if ( ! deployment.isValid()) return false;
+
+ boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository);
+ if ( ! couldMarkRetiredNow) return false;
+
+ Optional<Node> expectedNewNode = Optional.empty();
+ try {
+ if ( ! deployment.prepare()) return false;
+ if (verifyTarget) {
+ expectedNewNode =
+ nodeRepository.getNodes(application, Node.State.reserved).stream()
+ .filter(n -> !n.hostname().equals(node.hostname()))
+ .filter(n -> n.allocation().get().membership().cluster().id().equals(node.allocation().get().membership().cluster().id()))
+ .findAny();
+ if (expectedNewNode.isEmpty()) return false;
+ if (!expectedNewNode.get().hasParent(toHost.hostname())) return false;
+ }
+ if ( ! deployment.activate()) return false;
+
+ log.info(agent + " redeployed " + application + " to " +
+ ( verifyTarget ? this : "move " + (node.hostname() + " from " + fromHost)));
+ return true;
+ }
+ finally {
+ markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
+
+ // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
+ expectedNewNode.flatMap(node -> nodeRepository.getNode(node.hostname(), Node.State.reserved))
+ .ifPresent(node -> nodeRepository.setDirty(node, agent, "Expired by " + agent));
+ }
+ }
+ }
+
+ /** Returns true only if this operation changes the state of the wantToRetire flag */
+ private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) {
+ try (Mutex lock = nodeRepository.lock(node)) {
+ Optional<Node> nodeToMove = nodeRepository.getNode(node.hostname());
+ if (nodeToMove.isEmpty()) return false;
+ if (nodeToMove.get().state() != Node.State.active) return false;
+
+ if (nodeToMove.get().status().wantToRetire() == wantToRetire) return false;
+
+ nodeRepository.write(nodeToMove.get().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), lock);
+ return true;
+ }
+ }
+
+ public boolean isEmpty() { return node == null; }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(node, fromHost, toHost);
+ }
+
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if (o == null || o.getClass() != this.getClass()) return false;
+
+ Move other = (Move)o;
+ if ( ! Objects.equals(other.node, this.node)) return false;
+ if ( ! Objects.equals(other.fromHost, this.fromHost)) return false;
+ if ( ! Objects.equals(other.toHost, this.toHost)) return false;
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return "move " +
+ ( isEmpty() ? "none" : (node.hostname() + " from " + fromHost + " to " + toHost));
+ }
+
+ public static Move empty() { return new Move(null, null, null); }
+
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index afd9ad3ffa3..4323622df8b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -47,7 +47,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final InfrastructureProvisioner infrastructureProvisioner;
private final Optional<LoadBalancerExpirer> loadBalancerExpirer;
private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer;
- private final CapacityReportMaintainer capacityReportMaintainer;
+ private final SpareCapacityMaintainer spareCapacityMaintainer;
private final OsUpgradeActivator osUpgradeActivator;
private final Rebalancer rebalancer;
private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer;
@@ -88,7 +88,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService));
dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner ->
new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource));
- capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval);
+ spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval);
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
rebalancer = new Rebalancer(deployer, nodeRepository, metric, clock, defaults.rebalancerInterval);
nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval);
@@ -110,7 +110,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
failedExpirer.close();
dirtyExpirer.close();
nodeRebooter.close();
- capacityReportMaintainer.close();
+ spareCapacityMaintainer.close();
provisionedExpirer.close();
metricsReporter.close();
infrastructureProvisioner.close();
@@ -153,7 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration failedExpirerInterval;
private final Duration dirtyExpiry;
private final Duration provisionedExpiry;
- private final Duration capacityReportInterval;
+ private final Duration spareCapacityMaintenanceInterval;
private final Duration metricsInterval;
private final Duration retiredInterval;
private final Duration infrastructureProvisionInterval;
@@ -168,25 +168,24 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final NodeFailer.ThrottlePolicy throttlePolicy;
DefaultTimes(Zone zone) {
- failGrace = Duration.ofMinutes(30);
- periodicRedeployInterval = Duration.ofMinutes(30);
- // Don't redeploy in test environments
- redeployMaintainerInterval = Duration.ofMinutes(1);
- operatorChangeRedeployInterval = Duration.ofMinutes(1);
+ autoscalingInterval = Duration.ofMinutes(5);
+ dynamicProvisionerInterval = Duration.ofMinutes(5);
failedExpirerInterval = Duration.ofMinutes(10);
- provisionedExpiry = Duration.ofHours(4);
- capacityReportInterval = Duration.ofMinutes(10);
- metricsInterval = Duration.ofMinutes(1);
+ failGrace = Duration.ofMinutes(30);
infrastructureProvisionInterval = Duration.ofMinutes(1);
- throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
loadBalancerExpirerInterval = Duration.ofMinutes(5);
- reservationExpiry = Duration.ofMinutes(15); // Need to be long enough for deployment to be finished for all config model versions
- dynamicProvisionerInterval = Duration.ofMinutes(5);
+ metricsInterval = Duration.ofMinutes(1);
+ nodeMetricsCollectionInterval = Duration.ofMinutes(1);
+ operatorChangeRedeployInterval = Duration.ofMinutes(1);
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
+ periodicRedeployInterval = Duration.ofMinutes(30);
+ provisionedExpiry = Duration.ofHours(4);
rebalancerInterval = Duration.ofMinutes(40);
- nodeMetricsCollectionInterval = Duration.ofMinutes(1);
- autoscalingInterval = Duration.ofMinutes(5);
+ redeployMaintainerInterval = Duration.ofMinutes(1);
+ reservationExpiry = Duration.ofMinutes(15); // Need to be long enough for deployment to be finished for all config model versions
scalingSuggestionsInterval = Duration.ofMinutes(31);
+ spareCapacityMaintenanceInterval = Duration.ofMinutes(10);
+ throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 12990447eee..3df20fa9d08 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -6,16 +6,14 @@ import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
-import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
-import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
+import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;
import java.time.Clock;
import java.time.Duration;
-import java.util.Optional;
/**
* @author bratseth
@@ -41,24 +39,19 @@ public class Rebalancer extends NodeRepositoryMaintainer {
@Override
protected void maintain() {
- if (nodeRepository().canProvisionHostsWhenRequired()) return; // All nodes will be allocated on new hosts, so rebalancing makes no sense
- if (nodeRepository().zone().environment().isTest()) return; // Test zones have short lived deployments, no need to rebalance
+ if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return; // Rebalancing not necessary
+ if (nodeRepository().zone().environment().isTest()) return; // Short lived deployments; no need to rebalance
// Work with an unlocked snapshot as this can take a long time and full consistency is not needed
NodeList allNodes = nodeRepository().list();
-
updateSkewMetric(allNodes);
-
if ( ! zoneIsStable(allNodes)) return;
-
- Move bestMove = findBestMove(allNodes);
- if (bestMove == Move.none) return;
- deployTo(bestMove);
+ findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
}
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
private void updateSkewMetric(NodeList allNodes) {
- DockerHostCapacity capacity = new DockerHostCapacity(allNodes, nodeRepository().resourcesCalculator());
+ HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
double totalSkew = 0;
int hostCount = 0;
for (Node host : allNodes.nodeType((NodeType.host)).state(Node.State.active)) {
@@ -80,8 +73,8 @@ public class Rebalancer extends NodeRepositoryMaintainer {
* Returns Move.none if no moves can be made to reduce skew.
*/
private Move findBestMove(NodeList allNodes) {
- DockerHostCapacity capacity = new DockerHostCapacity(allNodes, nodeRepository().resourcesCalculator());
- Move bestMove = Move.none;
+ HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
+ Move bestMove = Move.empty();
for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) {
if (node.parentHostname().isEmpty()) continue;
ApplicationId applicationId = node.allocation().get().owner();
@@ -89,82 +82,29 @@ public class Rebalancer extends NodeRepositoryMaintainer {
if (deployedRecently(applicationId)) continue;
for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) {
if (toHost.hostname().equals(node.parentHostname().get())) continue;
- if ( ! capacity.freeCapacityOf(toHost).satisfies(node.flavor().resources())) continue;
+ if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue;
double skewReductionAtFromHost = skewReductionByRemoving(node, allNodes.parentOf(node).get(), capacity);
double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
if (netSkewReduction > bestMove.netSkewReduction)
- bestMove = new Move(node, toHost, netSkewReduction);
+ bestMove = new Move(node, nodeRepository().getNode(node.parentHostname().get()).get(), toHost, netSkewReduction);
}
}
return bestMove;
}
- /** Returns true only if this operation changes the state of the wantToRetire flag */
- private boolean markWantToRetire(Node node, boolean wantToRetire) {
- try (Mutex lock = nodeRepository().lock(node)) {
- Optional<Node> nodeToMove = nodeRepository().getNode(node.hostname());
- if (nodeToMove.isEmpty()) return false;
- if (nodeToMove.get().state() != Node.State.active) return false;
-
- if (nodeToMove.get().status().wantToRetire() == wantToRetire) return false;
-
- nodeRepository().write(nodeToMove.get().withWantToRetire(wantToRetire, Agent.Rebalancer, clock.instant()), lock);
- return true;
- }
- }
-
- /**
- * Try a redeployment to effect the chosen move.
- * If it can be done, that's ok; we'll try this or another move later.
- *
- * @return true if the move was done, false if it couldn't be
- */
- private boolean deployTo(Move move) {
- ApplicationId application = move.node.allocation().get().owner();
- try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
- if ( ! deployment.isValid()) return false;
-
- boolean couldMarkRetiredNow = markWantToRetire(move.node, true);
- if ( ! couldMarkRetiredNow) return false;
-
- Optional<Node> expectedNewNode = Optional.empty();
- try {
- if ( ! deployment.prepare()) return false;
- expectedNewNode =
- nodeRepository().getNodes(application, Node.State.reserved).stream()
- .filter(node -> !node.hostname().equals(move.node.hostname()))
- .filter(node -> node.allocation().get().membership().cluster().id().equals(move.node.allocation().get().membership().cluster().id()))
- .findAny();
- if (expectedNewNode.isEmpty()) return false;
- if ( ! expectedNewNode.get().hasParent(move.toHost.hostname())) return false;
- if ( ! deployment.activate()) return false;
-
- log.info("Rebalancer redeployed " + application + " to " + move);
- return true;
- }
- finally {
- markWantToRetire(move.node, false); // Necessary if this failed, no-op otherwise
-
- // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
- expectedNewNode.flatMap(node -> nodeRepository().getNode(node.hostname(), Node.State.reserved))
- .ifPresent(node -> nodeRepository().setDirty(node, Agent.Rebalancer, "Expired by Rebalancer"));
- }
- }
- }
-
- private double skewReductionByRemoving(Node node, Node fromHost, DockerHostCapacity capacity) {
+ private double skewReductionByRemoving(Node node, Node fromHost, HostCapacity capacity) {
NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost);
double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity);
double skewAfter = Node.skew(fromHost.flavor().resources(), freeHostCapacity.add(node.flavor().resources().justNumbers()));
return skewBefore - skewAfter;
}
- private double skewReductionByAdding(Node node, Node toHost, DockerHostCapacity capacity) {
+ private double skewReductionByAdding(Node node, Node toHost, HostCapacity capacity) {
NodeResources freeHostCapacity = capacity.freeCapacityOf(toHost);
double skewBefore = Node.skew(toHost.flavor().resources(), freeHostCapacity);
- double skewAfter = Node.skew(toHost.flavor().resources(), freeHostCapacity.subtract(node.flavor().resources().justNumbers()));
+ double skewAfter = Node.skew(toHost.flavor().resources(), freeHostCapacity.subtract(node.resources().justNumbers()));
return skewBefore - skewAfter;
}
@@ -176,25 +116,23 @@ public class Rebalancer extends NodeRepositoryMaintainer {
.orElse(true);
}
- private static class Move {
-
- static final Move none = new Move(null, null, 0);
+ private static class Move extends MaintenanceDeployment.Move {
- final Node node;
- final Node toHost;
final double netSkewReduction;
- Move(Node node, Node toHost, double netSkewReduction) {
- this.node = node;
- this.toHost = toHost;
+ Move(Node node, Node fromHost, Node toHost, double netSkewReduction) {
+ super(node, fromHost, toHost);
this.netSkewReduction = netSkewReduction;
}
@Override
public String toString() {
- return "move " +
- ( node == null ? "none" :
- (node.hostname() + " to " + toHost + " [skew reduction " + netSkewReduction + "]"));
+ if (isEmpty()) return "move none";
+ return super.toString() + " [skew reduction " + netSkewReduction + "]";
+ }
+
+ public static Move empty() {
+ return new Move(null, null, null, 0);
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
new file mode 100644
index 00000000000..54899372397
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -0,0 +1,337 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.stream.Collectors;
+
+/**
+ * A maintainer which attempts to ensure there is spare capacity available in chunks which can fit
+ * all node resource configuration in use, such that the system is able to quickly replace a failed node
+ * if necessary.
+ *
+ * This also emits the following metrics:
+ * - Overcommitted hosts: Hosts whose capacity is less than the sum of its children's
+ * - Spare host capacity, or how many hosts the repository can stand to lose without ending up in a situation where it's
+ * unable to find a new home for orphaned tenants.
+ *
+ * @author mgimle
+ * @author bratseth
+ */
+public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
+
+ private final int maxIterations;
+ private final Deployer deployer;
+ private final Metric metric;
+
+ public SpareCapacityMaintainer(Deployer deployer,
+ NodeRepository nodeRepository,
+ Metric metric,
+ Duration interval) {
+ this(deployer, nodeRepository, metric, interval,
+ 10_000 // Should take less than a few minutes
+ );
+ }
+
+ public SpareCapacityMaintainer(Deployer deployer,
+ NodeRepository nodeRepository,
+ Metric metric,
+ Duration interval,
+ int maxIterations) {
+ super(nodeRepository, interval);
+ this.deployer = deployer;
+ this.metric = metric;
+ this.maxIterations = maxIterations;
+ }
+
+ @Override
+ protected void maintain() {
+ if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return;
+
+ CapacityChecker capacityChecker = new CapacityChecker(nodeRepository());
+
+ List<Node> overcommittedHosts = capacityChecker.findOvercommittedHosts();
+ if (overcommittedHosts.size() != 0) {
+ log.log(Level.WARNING, String.format("%d nodes are overcommitted! [ %s ]",
+ overcommittedHosts.size(),
+ overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", "))));
+ }
+ metric.set("overcommittedHosts", overcommittedHosts.size(), null);
+
+ Optional<CapacityChecker.HostFailurePath> failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
+ if (failurePath.isPresent()) {
+ int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
+ if (spareHostCapacity == 0) {
+ Move move = findMitigatingMove(failurePath.get());
+ if (moving(move)) {
+ // We succeeded or are in the process of taking a step to mitigate.
+ // Report with the assumption this will eventually succeed to avoid alerting before we're stuck
+ spareHostCapacity++;
+ }
+ }
+ metric.set("spareHostCapacity", spareHostCapacity, null);
+ }
+ }
+
+ private boolean moving(Move move) {
+ if (move.isEmpty()) return false;
+ if (move.node().allocation().get().membership().retired()) return true; // Move already in progress
+ return move.execute(false, Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
+ }
+
+ private Move findMitigatingMove(CapacityChecker.HostFailurePath failurePath) {
+ Optional<Node> nodeWhichCantMove = failurePath.failureReason.tenant;
+ if (nodeWhichCantMove.isEmpty()) return Move.empty();
+
+ Node node = nodeWhichCantMove.get();
+ NodeList allNodes = nodeRepository().list();
+ // Allocation will assign the two most empty nodes as "spares", which will not be allocated on
+ // unless needed for node failing. Our goal here is to make room on these spares for the given node
+ HostCapacity hostCapacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
+ Set<Node> spareHosts = hostCapacity.findSpareHosts(allNodes.hosts().satisfies(node.resources()).asList(), 2);
+ List<Node> hosts = allNodes.hosts().except(spareHosts).asList();
+
+ CapacitySolver capacitySolver = new CapacitySolver(hostCapacity, maxIterations);
+ List<Move> shortestMitigation = null;
+ for (Node spareHost : spareHosts) {
+ List<Move> mitigation = capacitySolver.makeRoomFor(node, spareHost, hosts, List.of(), List.of());
+ if (mitigation == null) continue;
+ if (shortestMitigation == null || shortestMitigation.size() > mitigation.size())
+ shortestMitigation = mitigation;
+ }
+ if (shortestMitigation == null || shortestMitigation.isEmpty()) return Move.empty();
+ return shortestMitigation.get(0);
+ }
+
+ private static class CapacitySolver {
+
+ private final HostCapacity hostCapacity;
+ private final int maxIterations;
+
+ private int iterations = 0;
+
+ CapacitySolver(HostCapacity hostCapacity, int maxIterations) {
+ this.hostCapacity = hostCapacity;
+ this.maxIterations = maxIterations;
+ }
+
+ /** The map of subproblem solutions already found. The value is null when there is no solution. */
+ private Map<SolutionKey, List<Move>> solutions = new HashMap<>();
+
+ /**
+ * Finds the shortest sequence of moves which makes room for the given node on the given host,
+ * assuming the given moves already made over the given hosts' current allocation.
+ *
+ * @param node the node to make room for
+ * @param host the target host to make room on
+ * @param hosts the hosts onto which we can move nodes
+ * @param movesConsidered the moves already being considered to add as part of this scenario
+ * (after any moves made by this)
+ * @param movesMade the moves already made in this scenario
+ * @return the list of movesMade with the moves needed for this appended, in the order they should be performed,
+ * or null if no sequence could be found
+ */
+ List<Move> makeRoomFor(Node node, Node host, List<Node> hosts, List<Move> movesConsidered, List<Move> movesMade) {
+ SolutionKey solutionKey = new SolutionKey(node, host, movesConsidered, movesMade);
+ List<Move> solution = solutions.get(solutionKey);
+ if (solution == null) {
+ solution = findRoomFor(node, host, hosts, movesConsidered, movesMade);
+ solutions.put(solutionKey, solution);
+ }
+ return solution;
+ }
+
+ private List<Move> findRoomFor(Node node, Node host, List<Node> hosts,
+ List<Move> movesConsidered, List<Move> movesMade) {
+ if (iterations++ > maxIterations)
+ return null;
+
+ if ( ! host.resources().satisfies(node.resources())) return null;
+ NodeResources freeCapacity = freeCapacityWith(movesMade, host);
+ if (freeCapacity.satisfies(node.resources())) return List.of();
+
+ List<Move> shortest = null;
+ for (var i = subsets(hostCapacity.allNodes().childrenOf(host), 5); i.hasNext(); ) {
+ List<Node> childrenToMove = i.next();
+ if ( ! addResourcesOf(childrenToMove, freeCapacity).satisfies(node.resources())) continue;
+ List<Move> moves = move(childrenToMove, host, hosts, movesConsidered, movesMade);
+ if (moves == null) continue;
+
+ if (shortest == null || moves.size() < shortest.size())
+ shortest = moves;
+ }
+ if (shortest == null) return null;
+ return append(movesMade, shortest);
+ }
+
+ private List<Move> move(List<Node> nodes, Node host, List<Node> hosts, List<Move> movesConsidered, List<Move> movesMade) {
+ List<Move> moves = new ArrayList<>();
+ for (Node childToMove : nodes) {
+ List<Move> childMoves = move(childToMove, host, hosts, movesConsidered, append(movesMade, moves));
+ if (childMoves == null) return null;
+ moves.addAll(childMoves);
+ }
+ return moves;
+ }
+
+ private List<Move> move(Node node, Node host, List<Node> hosts, List<Move> movesConsidered, List<Move> movesMade) {
+ if (contains(node, movesConsidered)) return null;
+ if (contains(node, movesMade)) return null;
+ List<Move> shortest = null;
+ for (Node target : hosts) {
+ if (target.equals(host)) continue;
+ Move move = new Move(node, host, target);
+ List<Move> childMoves = makeRoomFor(node, target, hosts, append(movesConsidered, move), movesMade);
+ if (childMoves == null) continue;
+ if (shortest == null || shortest.size() > childMoves.size() + 1) {
+ shortest = new ArrayList<>(childMoves);
+ shortest.add(move);
+ }
+ }
+ return shortest;
+ }
+
+ private boolean contains(Node node, List<Move> moves) {
+ return moves.stream().anyMatch(move -> move.node().equals(node));
+ }
+
+ private NodeResources addResourcesOf(List<Node> nodes, NodeResources resources) {
+ for (Node node : nodes)
+ resources = resources.add(node.resources());
+ return resources;
+ }
+
+ private Iterator<List<Node>> subsets(NodeList nodes, int maxSize) {
+ return new SubsetIterator(nodes.asList(), maxSize);
+ }
+
+ private List<Move> append(List<Move> a, List<Move> b) {
+ List<Move> list = new ArrayList<>();
+ list.addAll(a);
+ list.addAll(b);
+ return list;
+ }
+
+ private List<Move> append(List<Move> moves, Move move) {
+ List<Move> list = new ArrayList<>(moves);
+ list.add(move);
+ return list;
+ }
+
+ private NodeResources freeCapacityWith(List<Move> moves, Node host) {
+ NodeResources resources = hostCapacity.freeCapacityOf(host);
+ for (Move move : moves) {
+ if ( ! move.toHost().equals(host)) continue;
+ resources = resources.subtract(move.node().resources());
+ }
+ for (Move move : moves) {
+ if ( ! move.fromHost().equals(host)) continue;
+ resources = resources.add(move.node().resources());
+ }
+ return resources;
+ }
+
+ }
+
+ private static class SolutionKey {
+
+ private final Node node;
+ private final Node host;
+ private final List<Move> movesConsidered;
+ private final List<Move> movesMade;
+
+ private final int hash;
+
+ public SolutionKey(Node node, Node host, List<Move> movesConsidered, List<Move> movesMade) {
+ this.node = node;
+ this.host = host;
+ this.movesConsidered = movesConsidered;
+ this.movesMade = movesMade;
+
+ hash = Objects.hash(node, host, movesConsidered, movesMade);
+ }
+
+ @Override
+ public int hashCode() { return hash; }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if (o == null || o.getClass() != this.getClass()) return false;
+
+ SolutionKey other = (SolutionKey)o;
+ if ( ! other.node.equals(this.node)) return false;
+ if ( ! other.host.equals(this.host)) return false;
+ if ( ! other.movesConsidered.equals(this.movesConsidered)) return false;
+ if ( ! other.movesMade.equals(this.movesMade)) return false;
+ return true;
+ }
+
+ }
+
+ private static class SubsetIterator implements Iterator<List<Node>> {
+
+ private final List<Node> nodes;
+ private final int maxLength;
+
+ // A number whose binary representation determines which items of list we'll include
+ private int i = 0; // first "previous" = 0 -> skip the empty set
+ private List<Node> next = null;
+
+ public SubsetIterator(List<Node> nodes, int maxLength) {
+ this.nodes = new ArrayList<>(nodes.subList(0, Math.min(nodes.size(), 31)));
+ this.maxLength = maxLength;
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (next != null) return true;
+
+ // find next
+ while (++i < 1<<nodes.size()) {
+ int ones = Integer.bitCount(i);
+ if (ones > maxLength) continue;
+
+ next = new ArrayList<>(ones);
+ for (int position = 0; position < nodes.size(); position++) {
+ if (hasOneAtPosition(position, i))
+ next.add(nodes.get(position));
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public List<Node> next() {
+ if ( ! hasNext()) throw new IllegalStateException("No more elements");
+ var current = next;
+ next = null;
+ return current;
+ }
+
+ private boolean hasOneAtPosition(int position, int number) {
+ return (number & (1 << position)) > 0;
+ }
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
index 31b7181a58a..eba9e4a1ac9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
@@ -21,6 +21,7 @@ public enum Agent {
ProvisionedExpirer,
ReservationExpirer,
DynamicProvisioningMaintainer,
- RetiringUpgrader;
+ RetiringUpgrader,
+ SpareCapacityMaintainer
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 15be7796187..37842115949 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -386,15 +386,16 @@ public class NodeSerializer {
case "operator" : return Agent.operator;
case "application" : return Agent.application;
case "system" : return Agent.system;
- case "NodeFailer" : return Agent.NodeFailer;
- case "Rebalancer" : return Agent.Rebalancer;
case "DirtyExpirer" : return Agent.DirtyExpirer;
+ case "DynamicProvisioningMaintainer" : return Agent.DynamicProvisioningMaintainer;
case "FailedExpirer" : return Agent.FailedExpirer;
case "InactiveExpirer" : return Agent.InactiveExpirer;
+ case "NodeFailer" : return Agent.NodeFailer;
case "ProvisionedExpirer" : return Agent.ProvisionedExpirer;
+ case "Rebalancer" : return Agent.Rebalancer;
case "ReservationExpirer" : return Agent.ReservationExpirer;
- case "DynamicProvisioningMaintainer" : return Agent.DynamicProvisioningMaintainer;
case "RetiringUpgrader" : return Agent.RetiringUpgrader;
+ case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer;
}
throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'");
}
@@ -403,15 +404,16 @@ public class NodeSerializer {
case operator : return "operator";
case application : return "application";
case system : return "system";
- case NodeFailer : return "NodeFailer";
- case Rebalancer : return "Rebalancer";
case DirtyExpirer : return "DirtyExpirer";
+ case DynamicProvisioningMaintainer : return "DynamicProvisioningMaintainer";
case FailedExpirer : return "FailedExpirer";
case InactiveExpirer : return "InactiveExpirer";
+ case NodeFailer : return "NodeFailer";
case ProvisionedExpirer : return "ProvisionedExpirer";
+ case Rebalancer : return "Rebalancer";
case ReservationExpirer : return "ReservationExpirer";
- case DynamicProvisioningMaintainer : return "DynamicProvisioningMaintainer";
case RetiringUpgrader: return "RetiringUpgrader";
+ case SpareCapacityMaintainer: return "SpareCapacityMaintainer";
}
throw new IllegalArgumentException("Serialized form of '" + agent + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index a61032af276..7158ccc57e3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -184,12 +184,12 @@ class Activator {
for (Node node : nodes) {
HostSpec hostSpec = getHost(node.hostname(), hosts);
node = hostSpec.membership().get().retired() ? node.retire(nodeRepository.clock().instant()) : node.unretire();
- if (! hostSpec.advertisedResources().equals(node.flavor().resources())) // A resized node
+ if (! hostSpec.advertisedResources().equals(node.resources())) // A resized node
node = node.with(new Flavor(hostSpec.advertisedResources()));
Allocation allocation = node.allocation().get()
.with(hostSpec.membership().get())
.withRequestedResources(hostSpec.requestedResources()
- .orElse(node.flavor().resources()));
+ .orElse(node.resources()));
if (hostSpec.networkPorts().isPresent())
allocation = allocation.withNetworkPorts(hostSpec.networkPorts().get());
node = node.with(allocation);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
index 5402e4bf3e8..38dd9f29873 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java
@@ -34,7 +34,7 @@ public class EmptyProvisionServiceProvider implements ProvisionServiceProvider {
private static class IdentityHostResourcesCalculator implements HostResourcesCalculator {
@Override
- public NodeResources realResourcesOf(Node node, NodeRepository repository) { return node.flavor().resources(); }
+ public NodeResources realResourcesOf(Node node, NodeRepository repository) { return node.resources(); }
@Override
public NodeResources advertisedResourcesOf(Flavor flavor) { return flavor.resources(); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index caecf8edf2f..d3e5f60599f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -58,7 +58,7 @@ public class GroupPreparer {
// active config model which is changed on activate
public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes,
List<Node> surplusActiveNodes, MutableInteger highestIndex, int spareCount, int wantedGroups) {
- boolean dynamicProvisioningEnabled = nodeRepository.canProvisionHostsWhenRequired() && nodeRepository.zone().getCloud().dynamicProvisioning();
+ boolean dynamicProvisioningEnabled = nodeRepository.canProvisionHosts() && nodeRepository.zone().getCloud().dynamicProvisioning();
boolean allocateFully = dynamicProvisioningEnabled && preprovisionCapacityFlag.value().isEmpty();
try (Mutex lock = nodeRepository.lock(application)) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java
index b508198db3a..fd16e61417f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java
@@ -3,10 +3,14 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
+import java.util.List;
import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
/**
* Capacity calculation for docker hosts.
@@ -16,17 +20,38 @@ import java.util.Objects;
*
* @author smorgrav
*/
-public class DockerHostCapacity {
+public class HostCapacity {
private final NodeList allNodes;
private final HostResourcesCalculator hostResourcesCalculator;
- public DockerHostCapacity(NodeList allNodes, HostResourcesCalculator hostResourcesCalculator) {
+ public HostCapacity(NodeList allNodes, HostResourcesCalculator hostResourcesCalculator) {
this.allNodes = Objects.requireNonNull(allNodes, "allNodes must be non-null");
this.hostResourcesCalculator = Objects.requireNonNull(hostResourcesCalculator, "hostResourcesCalculator must be non-null");
}
- int compareWithoutInactive(Node hostA, Node hostB) {
+ public NodeList allNodes() { return allNodes; }
+
+ /**
+ * Spare hosts are the hosts in the system with the most free capacity.
+ *
+ * We do not count retired or inactive nodes as used capacity (as they could have been
+ * moved to create space for the spare node in the first place).
+ *
+ * @param candidates the candidates to consider. This list may contain all kinds of nodes.
+ * @param count the max number of spare hosts to return
+ */
+ public Set<Node> findSpareHosts(List<Node> candidates, int count) {
+ return candidates.stream()
+ .filter(node -> node.type() == NodeType.host)
+ .filter(dockerHost -> dockerHost.state() == Node.State.active)
+ .filter(dockerHost -> freeIPs(dockerHost) > 0)
+ .sorted(this::compareWithoutInactive)
+ .limit(count)
+ .collect(Collectors.toSet());
+ }
+
+ private int compareWithoutInactive(Node hostA, Node hostB) {
int result = compare(freeCapacityOf(hostB, true), freeCapacityOf(hostA, true));
if (result != 0) return result;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index 47d1b30a8e7..df8a7e45917 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -148,7 +148,7 @@ class NodeAllocation {
}
node.node = offered.allocate(application,
ClusterMembership.from(cluster, highestIndex.add(1)),
- requestedNodes.resources().orElse(node.node.flavor().resources()),
+ requestedNodes.resources().orElse(node.node.resources()),
nodeRepository.clock().instant());
accepted.add(acceptNode(node, false, false));
}
@@ -242,7 +242,7 @@ class NodeAllocation {
Node node = prioritizableNode.node;
if (node.allocation().isPresent()) // Record the currently requested resources
- node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.flavor().resources())));
+ node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources())));
if (! wantToRetire) {
accepted++;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
index 8a15c058ff4..8560dd424e7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
@@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.IP;
-import com.yahoo.vespa.hosted.provision.persistence.NameResolver;
import java.util.EnumSet;
import java.util.HashMap;
@@ -21,7 +20,6 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
-import java.util.function.Predicate;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -39,7 +37,7 @@ public class NodePrioritizer {
private final Map<Node, PrioritizableNode> nodes = new HashMap<>();
private final LockedNodeList allNodes;
- private final DockerHostCapacity capacity;
+ private final HostCapacity capacity;
private final NodeSpec requestedNodes;
private final ApplicationId application;
private final ClusterSpec clusterSpec;
@@ -55,11 +53,11 @@ public class NodePrioritizer {
NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec,
int spares, int wantedGroups, boolean allocateFully, NodeRepository nodeRepository) {
this.allNodes = allNodes;
- this.capacity = new DockerHostCapacity(allNodes, nodeRepository.resourcesCalculator());
+ this.capacity = new HostCapacity(allNodes, nodeRepository.resourcesCalculator());
this.requestedNodes = nodeSpec;
this.clusterSpec = clusterSpec;
this.application = application;
- this.spareHosts = findSpareHosts(allNodes, capacity, spares);
+ this.spareHosts = capacity.findSpareHosts(allNodes.asList(), spares);
this.allocateFully = allocateFully;
this.nodeRepository = nodeRepository;
@@ -83,22 +81,6 @@ public class NodePrioritizer {
this.isDocker = resources(requestedNodes) != null;
}
- /**
- * Spare hosts are the two hosts in the system with the most free capacity.
- *
- * We do not count retired or inactive nodes as used capacity (as they could have been
- * moved to create space for the spare node in the first place).
- */
- private static Set<Node> findSpareHosts(LockedNodeList nodes, DockerHostCapacity capacity, int spares) {
- return nodes.asList().stream()
- .filter(node -> node.type() == NodeType.host)
- .filter(dockerHost -> dockerHost.state() == Node.State.active)
- .filter(dockerHost -> capacity.freeIPs(dockerHost) > 0)
- .sorted(capacity::compareWithoutInactive)
- .limit(spares)
- .collect(Collectors.toSet());
- }
-
/** Returns the list of nodes sorted by PrioritizableNode::compare */
List<PrioritizableNode> prioritize() {
return nodes.values().stream().sorted().collect(Collectors.toList());
@@ -206,8 +188,8 @@ public class NodePrioritizer {
builder.parent(parent).freeParentCapacity(parentCapacity);
if (!isNewNode)
- builder.resizable(!allocateFully
- && requestedNodes.canResize(node.flavor().resources(), parentCapacity, isTopologyChange, currentClusterSize));
+ builder.resizable(! allocateFully
+ && requestedNodes.canResize(node.resources(), parentCapacity, isTopologyChange, currentClusterSize));
if (spareHosts.contains(parent))
builder.violatesSpares(true);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index a8abdc3f38a..9971aae1714 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -139,7 +139,7 @@ public interface NodeSpec {
@Override
public boolean needsResize(Node node) {
- return ! node.flavor().resources().compatibleWith(requestedNodeResources);
+ return ! node.resources().compatibleWith(requestedNodeResources);
}
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java
index 3fc60c1192d..0c1b396c40c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java
@@ -126,7 +126,7 @@ class PrioritizableNode implements Comparable<PrioritizableNode> {
double skewWithoutThis() { return skewWith(zeroResources); }
/** Returns the allocation skew of the parent of this after adding this node to it */
- double skewWithThis() { return skewWith(node.flavor().resources()); }
+ double skewWithThis() { return skewWith(node.resources()); }
private double skewWith(NodeResources resources) {
if (parent.isEmpty()) return 0;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
index aa81aae84fe..a4161a318ab 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java
@@ -43,7 +43,7 @@ public class ApplicationSerializer {
if (nodes.isEmpty()) return;
int groups = (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
- ClusterResources currentResources = new ClusterResources(nodes.size(), groups, nodes.get(0).flavor().resources());
+ ClusterResources currentResources = new ClusterResources(nodes.size(), groups, nodes.get(0).resources());
toSlime(cluster.minResources(), clusterObject.setObject("min"));
toSlime(cluster.maxResources(), clusterObject.setObject("max"));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/HostCapacityResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/HostCapacityResponse.java
index 12a29707303..e28b03d7517 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/HostCapacityResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/HostCapacityResponse.java
@@ -20,6 +20,7 @@ import java.util.Optional;
* @author mgimle
*/
public class HostCapacityResponse extends HttpResponse {
+
private final StringBuilder text;
private final Slime slime;
private final CapacityChecker capacityChecker;
@@ -128,7 +129,7 @@ public class HostCapacityResponse extends HttpResponse {
);
failurePath.failureReason.tenant.ifPresent(tenant -> {
object.setString("failedTenant", tenant.hostname());
- object.setString("failedTenantResources", tenant.flavor().resources().toString());
+ object.setString("failedTenantResources", tenant.resources().toString());
tenant.allocation().ifPresent(allocation ->
object.setString("failedTenantAllocation", allocation.toString())
);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index 5ec5c2c08e8..ae3d6ebf815 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -10,6 +10,9 @@ import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.HostFilter;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
import java.time.Clock;
@@ -27,18 +30,22 @@ import java.util.stream.Collectors;
*/
public class MockDeployer implements Deployer {
+ // For actual deploy mode
private final NodeRepositoryProvisioner provisioner;
private final Map<ApplicationId, ApplicationContext> applications;
- private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>();
+ // For mock deploy anything, changing wantToRetire to retired only
+ private final NodeRepository nodeRepository;
/** The number of redeployments done to this */
public int redeployments = 0;
+ private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>();
private final Clock clock;
private final ReentrantLock lock = new ReentrantLock();
private boolean failActivate = false;
+ /** Create a mock deployer which returns empty on every deploy request. */
@Inject
@SuppressWarnings("unused")
public MockDeployer() {
@@ -46,15 +53,30 @@ public class MockDeployer implements Deployer {
}
/**
- * Create a mock deployer which contains a substitute for an application repository, fullfilled to
+ * Create a mock deployer which returns a deployment on every request,
+ * and fulfills it by not actually deploying but only changing any wantToRetire nodes
+ * for the application to retired.
+ */
+ public MockDeployer(NodeRepository nodeRepository) {
+ this.provisioner = null;
+ this.applications = Map.of();
+ this.nodeRepository = nodeRepository;
+
+ this.clock = nodeRepository.clock();
+ }
+
+ /**
+ * Create a mock deployer which contains a substitute for an application repository, filled to
* be able to call provision with the right parameters.
*/
public MockDeployer(NodeRepositoryProvisioner provisioner,
Clock clock,
Map<ApplicationId, ApplicationContext> applications) {
this.provisioner = provisioner;
- this.clock = clock;
this.applications = new HashMap<>(applications);
+ this.nodeRepository = null;
+
+ this.clock = clock;
}
public ReentrantLock lock() { return lock; }
@@ -74,9 +96,13 @@ public class MockDeployer implements Deployer {
throw new RuntimeException(e);
}
try {
- return Optional.ofNullable(applications.get(id))
- .map(application -> new MockDeployment(provisioner, application));
- } finally {
+ if (provisioner != null)
+ return Optional.ofNullable(applications.get(id))
+ .map(application -> new MockDeployment(provisioner, application));
+ else
+ return Optional.of(new RetiringOnlyMockDeployment(nodeRepository, id));
+ }
+ finally {
lock.unlock();
}
}
@@ -135,6 +161,33 @@ public class MockDeployer implements Deployer {
}
+ public class RetiringOnlyMockDeployment implements Deployment {
+
+ private final NodeRepository nodeRepository;
+ private final ApplicationId applicationId;
+
+ private RetiringOnlyMockDeployment(NodeRepository nodeRepository, ApplicationId applicationId) {
+ this.nodeRepository = nodeRepository;
+ this.applicationId = applicationId;
+ }
+
+ @Override
+ public void prepare() { }
+
+ @Override
+ public void activate() {
+ redeployments++;
+ lastDeployTimes.put(applicationId, clock.instant());
+
+ for (Node node : nodeRepository.list().owner(applicationId).state(Node.State.active).wantToRetire().asList())
+ nodeRepository.write(node.retire(nodeRepository.clock().instant()), nodeRepository.lock(node));
+ }
+
+ @Override
+ public void restart(HostFilter filter) {}
+
+ }
+
/** An application context which substitutes for an application repository */
public static class ApplicationContext {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index cc5c6851a92..a0a44e4f342 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -383,7 +383,7 @@ public class AutoscalingTest {
@Override
public NodeResources realResourcesOf(Node node, NodeRepository nodeRepository) {
- return node.flavor().resources();
+ return node.resources();
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index b0e394c93d3..1137ae5ce2c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -208,9 +208,9 @@ class AutoscalingTester {
@Override
public NodeResources realResourcesOf(Node node, NodeRepository nodeRepository) {
if (zone.getCloud().dynamicProvisioning())
- return node.flavor().resources().withMemoryGb(node.flavor().resources().memoryGb() - 3);
+ return node.resources().withMemoryGb(node.resources().memoryGb() - 3);
else
- return node.flavor().resources();
+ return node.resources();
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
index 5813585554d..5e72cfc53ac 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
@@ -29,6 +29,7 @@ public class CapacityCheckerTest {
var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
assertTrue(failurePath.isPresent());
assertTrue(tester.nodeRepository.getNodes(NodeType.host).containsAll(failurePath.get().hostsCausingFailure));
+ assertEquals(5, failurePath.get().hostsCausingFailure.size());
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
index a6b2f6b15ea..62e9a227109 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
@@ -45,6 +45,7 @@ import java.util.stream.IntStream;
* @author mgimle
*/
public class CapacityCheckerTester {
+
public static final Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
// Components with state
@@ -129,7 +130,7 @@ public class CapacityCheckerTester {
childModel.parentHostname = Optional.of(hostname);
Node childNode = createNodeFromModel(childModel);
- childResources.add(childNode.flavor().resources());
+ childResources.add(childNode.resources());
hosts.add(childNode);
}
@@ -138,8 +139,7 @@ public class CapacityCheckerTester {
.mapToObj(n -> String.format("%04X::%04X", hostindex, n))
.collect(Collectors.toSet());
- NodeResources nr = containingNodeResources(childResources,
- excessCapacity);
+ NodeResources nr = containingNodeResources(childResources, excessCapacity);
Node node = nodeRepository.createNode(hostname, hostname,
new IP.Config(Set.of("::"), availableIps), Optional.empty(),
new Flavor(nr), Optional.empty(), NodeType.host);
@@ -159,7 +159,8 @@ public class CapacityCheckerTester {
Set<String> availableIps = IntStream.range(0, ips)
.mapToObj(n -> String.format("%04X::%04X", hostid, n))
.collect(Collectors.toSet());
- Node node = nodeRepository.createNode(hostname, hostname,
+ Node node = nodeRepository.createNode(hostname,
+ hostname,
new IP.Config(Set.of("::"), availableIps), Optional.empty(),
new Flavor(capacity), Optional.empty(), NodeType.host);
hosts.add(node);
@@ -175,8 +176,8 @@ public class CapacityCheckerTester {
);
createNodes(childrenPerHost, numDistinctChildren, childResources,
- numHosts, hostExcessCapacity, hostExcessIps,
- numEmptyHosts, emptyHostExcessCapacity, emptyHostExcessIps);
+ numHosts, hostExcessCapacity, hostExcessIps,
+ numEmptyHosts, emptyHostExcessCapacity, emptyHostExcessIps);
}
void createNodes(int childrenPerHost, int numDistinctChildren, List<NodeResources> childResources,
int numHosts, NodeResources hostExcessCapacity, int hostExcessIps,
@@ -264,10 +265,11 @@ public class CapacityCheckerTester {
owner = ApplicationId.from(nodeModel.owner.tenant, nodeModel.owner.application, nodeModel.owner.instance);
}
- NodeResources.DiskSpeed diskSpeed;
- NodeResources nr = new NodeResources(nodeModel.minCpuCores, nodeModel.minMainMemoryAvailableGb,
- nodeModel.minDiskAvailableGb, nodeModel.bandwidth * 1000,
- nodeModel.fastDisk ? NodeResources.DiskSpeed.fast : NodeResources.DiskSpeed.slow);
+ NodeResources nr = new NodeResources(nodeModel.minCpuCores,
+ nodeModel.minMainMemoryAvailableGb,
+ nodeModel.minDiskAvailableGb,
+ nodeModel.bandwidth * 1000,
+ nodeModel.fastDisk ? NodeResources.DiskSpeed.fast : NodeResources.DiskSpeed.slow);
Flavor f = new Flavor(nr);
Node node = nodeRepository.createNode(nodeModel.id, nodeModel.hostname,
@@ -275,7 +277,7 @@ public class CapacityCheckerTester {
nodeModel.parentHostname, f, Optional.empty(), nodeModel.type);
if (membership != null) {
- return node.allocate(owner, membership, node.flavor().resources(), Instant.now());
+ return node.allocate(owner, membership, node.resources(), Instant.now());
} else {
return node;
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 9fc2f666d27..727232e5c7c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -223,7 +223,7 @@ public class MetricsReporterTest {
if (tenant.isPresent()) {
Allocation allocation = new Allocation(app(tenant.get()),
ClusterMembership.from("container/id1/0/3", new Version(), Optional.empty()),
- owner.flavor().resources(),
+ owner.resources(),
Generation.initial(),
false);
return Optional.of(allocation);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index e2fd8a8721c..51f70e8b640 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -301,9 +301,9 @@ public class NodeFailerTest {
// Two ready nodes and a ready docker node die, but only 2 of those are failed out
tester.clock.advance(Duration.ofMinutes(180));
- Node dockerNode = ready.stream().filter(node -> node.flavor().resources().equals(newNodeResources)).findFirst().get();
+ Node dockerNode = ready.stream().filter(node -> node.resources().equals(newNodeResources)).findFirst().get();
List<Node> otherNodes = ready.stream()
- .filter(node -> ! node.flavor().resources().equals(newNodeResources))
+ .filter(node -> ! node.resources().equals(newNodeResources))
.collect(Collectors.toList());
tester.allNodesMakeAConfigRequestExcept(otherNodes.get(0), otherNodes.get(2), dockerNode);
tester.failer.run();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
new file mode 100644
index 00000000000..fb84dc0a32a
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
@@ -0,0 +1,327 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterMembership;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeFlavors;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.IP;
+import com.yahoo.vespa.hosted.provision.provisioning.EmptyProvisionServiceProvider;
+import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class SpareCapacityMaintainerTest {
+
+ @Test
+ public void testEmpty() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ }
+
+ @Test
+ public void testOneSpare() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testTwoSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(3, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(2, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testNoSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(0, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testAllWorksAsSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(4, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(5, 50, 500, 0.5), 0);
+ tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(2, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testMoveIsNeeded() {
+ // Moving application id 1 and 2 to the same nodes frees up spares for application 0
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(6, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1), 0);
+ tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2);
+ tester.addNodes(2, 2, new NodeResources(5, 50, 500, 0.5), 4);
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+
+ // Maintaining again is a no-op since the node to move is already retired
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testMultipleMovesAreNeeded() {
+ // Moving application id 1 and 2 to the same nodes frees up spares for application 0
+ // so that it can be moved from size 12 to size 10 hosts, clearing up spare room for the size 12 application
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(4, new NodeResources(12, 120, 1200, 1.2));
+ tester.addHosts(4, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1.0), 0);
+ tester.addNodes(1, 2, new NodeResources(12, 120, 1200, 1.2), 2);
+ tester.addNodes(2, 2, new NodeResources(5, 50, 500, 0.5), 4);
+ tester.addNodes(3, 2, new NodeResources(5, 50, 500, 0.5), 6);
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testMultipleNodesMustMoveFromOneHost() {
+ // By moving the 4 small nodes from host 2 we free up sufficient space on the third host to act as a spare for
+ // application 0
+ var tester = new SpareCapacityMaintainerTester();
+ setupMultipleHosts(tester, 5);
+
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testMultipleNodesMustMoveFromOneHostButInsufficientCapacity() {
+ var tester = new SpareCapacityMaintainerTester();
+ setupMultipleHosts(tester, 4);
+
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(0, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ private void setupMultipleHosts(SpareCapacityMaintainerTester tester, int smallNodeCount) {
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1.0), 0);
+
+ tester.addHosts(1, new NodeResources(16, 160, 1600, 1.6));
+ tester.addNodes(1, 1, new NodeResources(1, 10, 100, 0.1), 2);
+ tester.addNodes(2, 1, new NodeResources(1, 10, 100, 0.1), 2);
+ tester.addNodes(3, 1, new NodeResources(1, 10, 100, 0.1), 2);
+ tester.addNodes(4, 1, new NodeResources(1, 10, 100, 0.1), 2);
+ tester.addNodes(5, 1, new NodeResources(2, 20, 200, 2.0), 2);
+ tester.addNodes(6, 1, new NodeResources(2, 20, 200, 2.0), 2);
+ tester.addNodes(7, 1, new NodeResources(2, 20, 200, 2.0), 2);
+
+ tester.addHosts(smallNodeCount, new NodeResources(2, 20, 200, 2.0));
+ }
+
+ @Test
+ public void testTooManyIterationsAreNeeded() {
+ // 6 nodes must move to the next host, which is more than the max limit
+ var tester = new SpareCapacityMaintainerTester(5);
+
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addHosts(1, new NodeResources(9, 90, 900, 0.9));
+ tester.addHosts(1, new NodeResources(8, 80, 800, 0.8));
+ tester.addHosts(1, new NodeResources(7, 70, 700, 0.7));
+ tester.addHosts(1, new NodeResources(6, 60, 600, 0.6));
+ tester.addHosts(1, new NodeResources(5, 50, 500, 0.5));
+ tester.addHosts(1, new NodeResources(4, 40, 400, 0.4));
+
+ tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1.0), 0);
+ tester.addNodes(1, 1, new NodeResources( 9, 90, 900, 0.9), 1);
+ tester.addNodes(2, 1, new NodeResources( 8, 80, 800, 0.8), 2);
+ tester.addNodes(3, 1, new NodeResources( 7, 70, 700, 0.7), 3);
+ tester.addNodes(4, 1, new NodeResources( 6, 60, 600, 0.6), 4);
+ tester.addNodes(5, 1, new NodeResources( 5, 50, 500, 0.5), 5);
+ tester.addNodes(6, 1, new NodeResources( 4, 40, 400, 0.4), 6);
+
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(0, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ /** Microbenchmark */
+ @Test
+ @Ignore
+ public void testLargeNodeRepo() {
+ // Completely fill 200 hosts with 2000 nodes
+ int hosts = 200;
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(hosts, new NodeResources(100, 1000, 10000, 10));
+ int hostOffset = 0;
+ for (int i = 0; i < 200; i++) {
+ int applicationSize = 10;
+ int resourceSize = 10;
+ tester.addNodes(i, applicationSize, new NodeResources(resourceSize, resourceSize * 10, resourceSize * 100, 0.1), hostOffset);
+ hostOffset = (hostOffset + applicationSize) % hosts;
+ }
+ long startTime = System.currentTimeMillis();
+ tester.maintainer.maintain();
+ long totalTime = System.currentTimeMillis() - startTime;
+ System.out.println("Complete in " + ( totalTime / 1000) + " seconds");
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(0, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ private static class SpareCapacityMaintainerTester {
+
+ NodeRepository nodeRepository;
+ MockDeployer deployer;
+ TestMetric metric = new TestMetric();
+ SpareCapacityMaintainer maintainer;
+ private int hostIndex = 0;
+ private int nodeIndex = 0;
+
+ private SpareCapacityMaintainerTester() {
+ this(1000);
+ }
+
+ private SpareCapacityMaintainerTester(int maxIterations) {
+ NodeFlavors flavors = new NodeFlavors(new FlavorConfigBuilder().build());
+ nodeRepository = new NodeRepository(flavors,
+ new EmptyProvisionServiceProvider().getHostResourcesCalculator(),
+ new MockCurator(),
+ new ManualClock(),
+ new Zone(Environment.prod, RegionName.from("us-east-3")),
+ new MockNameResolver().mockAnyLookup(),
+ DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"), true, false);
+ deployer = new MockDeployer(nodeRepository);
+ maintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, Duration.ofDays(1), maxIterations);
+ }
+
+ private void addHosts(int count, NodeResources resources) {
+ List<Node> hosts = new ArrayList<>();
+ for (int i = 0; i < count; i++) {
+ Node host = nodeRepository.createNode("host" + hostIndex,
+ "host" + hostIndex + ".yahoo.com",
+ ipConfig(hostIndex + nodeIndex, true),
+ Optional.empty(),
+ new Flavor(resources),
+ Optional.empty(),
+ NodeType.host);
+ hosts.add(host);
+ hostIndex++;
+ }
+ hosts = nodeRepository.addNodes(hosts, Agent.system);
+ hosts = nodeRepository.setReady(hosts, Agent.system, "Test");
+ var transaction = new NestedTransaction();
+ nodeRepository.activate(hosts, transaction);
+ transaction.commit();
+ }
+
+ private void addNodes(int id, int count, NodeResources resources, int hostOffset) {
+ List<Node> nodes = new ArrayList<>();
+ ApplicationId application = ApplicationId.from("tenant" + id, "application" + id, "default");
+ for (int i = 0; i < count; i++) {
+ ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id))
+ .group(ClusterSpec.Group.from(0))
+ .vespaVersion("7")
+ .build(),
+ i);
+ Node node = nodeRepository.createNode("node" + nodeIndex,
+ "node" + nodeIndex + ".yahoo.com",
+ ipConfig(hostIndex + nodeIndex, false),
+ Optional.of("host" + ( hostOffset + i) + ".yahoo.com"),
+ new Flavor(resources),
+ Optional.empty(),
+ NodeType.tenant);
+ node = node.allocate(application, membership, node.resources(), Instant.now());
+ nodes.add(node);
+ nodeIndex++;
+ }
+ nodes = nodeRepository.addNodes(nodes, Agent.system);
+ for (int i = 0; i < count; i++) {
+ Node node = nodes.get(i);
+ ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id))
+ .group(ClusterSpec.Group.from(0))
+ .vespaVersion("7")
+ .build(),
+ i);
+ node = node.allocate(application, membership, node.resources(), Instant.now());
+ nodes.set(i, node);
+ }
+ nodes = nodeRepository.reserve(nodes);
+ var transaction = new NestedTransaction();
+ nodes = nodeRepository.activate(nodes, transaction);
+ transaction.commit();
+ }
+
+ private IP.Config ipConfig(int id, boolean host) {
+ return new IP.Config(Set.of(String.format("%04X::%04X", id, 0)),
+ host ? IntStream.range(0, 10)
+ .mapToObj(n -> String.format("%04X::%04X", id, n))
+ .collect(Collectors.toSet())
+ : Set.of());
+ }
+
+ private void dumpState() {
+ for (Node host : nodeRepository.list().hosts().asList()) {
+ System.out.println("Host " + host.hostname() + " " + host.resources());
+ for (Node node : nodeRepository.list().childrenOf(host).asList())
+ System.out.println(" Node " + node.hostname() + " " + node.resources() + " allocation " +node.allocation());
+ }
+ }
+
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AllocationVisualizer.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AllocationVisualizer.java
index ea4386f2fd5..644b2338a5a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AllocationVisualizer.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AllocationVisualizer.java
@@ -102,13 +102,13 @@ public class AllocationVisualizer extends JPanel {
if (isHost) {
g.setColor(Color.GRAY);
- for (int i = 0; i < node.flavor().resources().memoryGb(); i++) {
+ for (int i = 0; i < node.resources().memoryGb(); i++) {
g.fillRect(x, y - nodeHeight, nodeWidth, nodeHeight);
y = y - (nodeHeight + 2);
}
} else {
g.setColor(Color.YELLOW);
- int multi = (int) node.flavor().resources().memoryGb();
+ int multi = (int) node.resources().memoryGb();
int height = multi * nodeHeight + ((multi - 1) * 2);
g.fillRect(x, y - height, nodeWidth, height);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
index 3ffb0dc34f0..0c5a682c3c5 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
@@ -60,7 +60,7 @@ public class DockerProvisioningTest {
NodeList nodes = tester.getNodes(application1, Node.State.active);
assertEquals(nodeCount, nodes.size());
- assertEquals(dockerResources, nodes.asList().get(0).flavor().resources());
+ assertEquals(dockerResources, nodes.asList().get(0).resources());
// Upgrade Vespa version on nodes
Version upgradedWantedVespaVersion = Version.fromString("6.40");
@@ -70,7 +70,7 @@ public class DockerProvisioningTest {
tester.activate(application1, new HashSet<>(upgradedHosts));
NodeList upgradedNodes = tester.getNodes(application1, Node.State.active);
assertEquals(nodeCount, upgradedNodes.size());
- assertEquals(dockerResources, upgradedNodes.asList().get(0).flavor().resources());
+ assertEquals(dockerResources, upgradedNodes.asList().get(0).resources());
assertEquals(hosts, upgradedHosts);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
index 7350df40718..98ec01e8e95 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
@@ -425,7 +425,7 @@ public class DynamicDockerAllocationTest {
);
ClusterMembership clusterMembership1 = ClusterMembership.from(
clusterSpec.with(Optional.of(ClusterSpec.Group.from(0))), index); // Need to add group here so that group is serialized in node allocation
- Node node1aAllocation = node1a.allocate(id, clusterMembership1, node1a.flavor().resources(), Instant.now());
+ Node node1aAllocation = node1a.allocate(id, clusterMembership1, node1a.resources(), Instant.now());
tester.nodeRepository().addNodes(Collections.singletonList(node1aAllocation), Agent.system);
NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(tester.getCurator()));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java
index aef25daa659..da78aff493e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java
@@ -27,10 +27,10 @@ import static org.mockito.Mockito.mock;
/**
* @author smorgrav
*/
-public class DockerHostCapacityTest {
+public class HostCapacityTest {
private final HostResourcesCalculator hostResourcesCalculator = mock(HostResourcesCalculator.class);
- private DockerHostCapacity capacity;
+ private HostCapacity capacity;
private List<Node> nodes;
private Node host1, host2, host3;
private final NodeResources resources1 = new NodeResources(1, 30, 20, 1.5);
@@ -61,7 +61,7 @@ public class DockerHostCapacityTest {
// init docker host capacity
nodes = new ArrayList<>(List.of(host1, host2, host3, nodeA, nodeB, nodeC, nodeD, nodeE));
- capacity = new DockerHostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
+ capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
}
@Test
@@ -76,7 +76,7 @@ public class DockerHostCapacityTest {
// Add a new node to host1 to deplete the memory resource
Node nodeF = Node.createDockerNode(Set.of("::6"), "nodeF", "host1", resources1, NodeType.tenant);
nodes.add(nodeF);
- capacity = new DockerHostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
+ capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
assertFalse(capacity.hasCapacity(host1, resources1));
assertFalse(capacity.hasCapacity(host1, resources2));
}
@@ -116,12 +116,12 @@ public class DockerHostCapacityTest {
var cfg = Node.createDockerNode(Set.of("::2"), "cfg", "devhost", resources1, NodeType.config);
var nodes = new ArrayList<>(List.of(cfg));
- var capacity = new DockerHostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
+ var capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
assertTrue(capacity.hasCapacity(devHost, resources1));
var container1 = Node.createDockerNode(Set.of("::3"), "container1", "devhost", resources1, NodeType.tenant);
nodes = new ArrayList<>(List.of(cfg, container1));
- capacity = new DockerHostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
+ capacity = new HostCapacity(new LockedNodeList(nodes, () -> {}), hostResourcesCalculator);
assertFalse(capacity.hasCapacity(devHost, resources1));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
index b2ee298c19d..71c3ec37d65 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
@@ -150,9 +150,9 @@ public class InPlaceResizeProvisionTest {
assertEquals(6, appNodes.size()); // 4 nodes with large resources + 2 retired nodes with medium resources
appNodes.forEach(node -> {
if (node.allocation().get().membership().retired())
- assertEquals(new NodeResources(4, 8, 160, 1, fast, local), node.flavor().resources());
+ assertEquals(new NodeResources(4, 8, 160, 1, fast, local), node.resources());
else
- assertEquals(new NodeResources(8, 16, 320, 1, fast, local), node.flavor().resources());
+ assertEquals(new NodeResources(8, 16, 320, 1, fast, local), node.resources());
initialHostnames.remove(node.hostname());
});
assertTrue("All initial nodes should still be allocated to the application", initialHostnames.isEmpty());
@@ -254,7 +254,7 @@ public class InPlaceResizeProvisionTest {
private void assertSizeAndResources(NodeList nodes, int size, NodeResources resources) {
assertEquals(size, nodes.size());
- nodes.forEach(n -> assertEquals(resources, n.flavor().resources()));
+ nodes.forEach(n -> assertEquals(resources, n.resources()));
}
private NodeList listCluster(ClusterSpec cluster) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
index 48bd091011e..e45ea09d372 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
@@ -140,7 +140,7 @@ public class InfraDeployerImplTest {
Optional<Node> nodeWithAllocation = wantedVespaVersion.map(version -> {
ClusterSpec clusterSpec = application.getClusterSpecWithVersion(version).with(Optional.of(ClusterSpec.Group.from(0)));
ClusterMembership membership = ClusterMembership.from(clusterSpec, 1);
- Allocation allocation = new Allocation(application.getApplicationId(), membership, node.flavor().resources(), Generation.initial(), false);
+ Allocation allocation = new Allocation(application.getApplicationId(), membership, node.resources(), Generation.initial(), false);
return node.with(allocation);
});
return nodeRepository.database().writeTo(state, nodeWithAllocation.orElse(node), Agent.system, Optional.empty());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index 2427c0303c6..e73aeb05ce3 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -262,8 +262,8 @@ public class ProvisioningTester {
nodeList.stream().map(n -> n.allocation().get().membership().cluster().group().get()).distinct().count());
for (Node node : nodeList) {
var expected = new NodeResources(vcpu, memory, disk, bandwidth, diskSpeed, storageType);
- assertTrue(explanation + ": Resources: Expected " + expected + " but was " + node.flavor().resources(),
- expected.compatibleWith(node.flavor().resources()));
+ assertTrue(explanation + ": Resources: Expected " + expected + " but was " + node.resources(),
+ expected.compatibleWith(node.resources()));
}
}
@@ -658,7 +658,7 @@ public class ProvisioningTester {
@Override
public NodeResources realResourcesOf(Node node, NodeRepository nodeRepository) {
- NodeResources resources = node.flavor().resources();
+ NodeResources resources = node.resources();
if (node.type() == NodeType.host) return resources;
return resources.withMemoryGb(resources.memoryGb() - memoryTaxGb)
.withDiskGb(resources.diskGb() - ( resources.storageType() == local ? localDiskTax : 0));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
index e041a7b8b54..6bb30d90218 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
@@ -4,9 +4,6 @@
"name": "AutoscalingMaintainer"
},
{
- "name": "CapacityReportMaintainer"
- },
- {
"name": "DirtyExpirer"
},
{
@@ -56,6 +53,9 @@
},
{
"name":"ScalingSuggestionsMaintainer"
+ },
+ {
+ "name": "SpareCapacityMaintainer"
}
],
"inactive": [