summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-06-17 10:27:06 +0200
committerJon Bratseth <bratseth@gmail.com>2020-06-17 10:27:06 +0200
commit57cbdcc67d9b0f35f05325d6d72d4a0cc8187a4b (patch)
tree72556b057aaa9886283722d026762e7c3f5ecbb6 /node-repository
parent7f7b6777514bf05916e2edcbc3e27b1bfd28906c (diff)
Test SpareCapacityMaintainer
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java37
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java24
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java25
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java20
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java65
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java214
11 files changed, 368 insertions, 68 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 1cc2abef734..2a6db1ffe6e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -32,7 +32,7 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which are retired */
public NodeList retired() {
- return matching(node -> node.allocation().get().membership().retired());
+ return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().retired());
}
/** Returns the subset of nodes that are being deprovisioned */
@@ -42,7 +42,7 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which are removable */
public NodeList removable() {
- return matching(node -> node.allocation().get().isRemovable());
+ return matching(node -> node.allocation().isPresent() && node.allocation().get().isRemovable());
}
/** Returns the subset of nodes having exactly the given resources */
@@ -146,6 +146,16 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
return matching(node -> nodeStates.contains(node.state()));
}
+ /** Returns the subset of nodes in the active state */
+ public NodeList active() {
+ return matching(node -> node.state()== Node.State.active);
+ }
+
+ /** Returns the subset of nodes which wantToRetire set true */
+ public NodeList wantToRetire() {
+ return matching((node -> node.status().wantToRetire()));
+ }
+
/** Returns the parent nodes of the given child nodes */
public NodeList parentsOf(Collection<Node> children) {
return children.stream()
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
index 97810c0b329..1c078dbd0c4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
@@ -206,20 +206,24 @@ public class CapacityChecker {
var containedAllocations = collateAllocations(nodechildren);
var resourceMap = new HashMap<>(availableResources);
List<Node> validAllocationTargets = allHosts.stream()
- .filter(h -> !hostsToRemove.contains(h))
- .collect(Collectors.toList());
- if (validAllocationTargets.size() == 0) {
+ .filter(h -> !hostsToRemove.contains(h))
+ .collect(Collectors.toList());
+ if (validAllocationTargets.size() == 0)
return Optional.of(HostRemovalFailure.none());
- }
allocationHistory = new AllocationHistory();
for (var host : hostsToRemove) {
Optional<Node> unallocatedNode = tryAllocateNodes(nodechildren.get(host),
- validAllocationTargets, resourceMap, containedAllocations, true);
+ validAllocationTargets,
+ resourceMap,
+ containedAllocations,
+ true);
if (unallocatedNode.isPresent()) {
AllocationFailureReasonList failures = collateAllocationFailures(unallocatedNode.get(),
- validAllocationTargets, resourceMap, containedAllocations);
+ validAllocationTargets,
+ resourceMap,
+ containedAllocations);
return Optional.of(HostRemovalFailure.create(host, unallocatedNode.get(), failures));
}
}
@@ -332,6 +336,11 @@ public class CapacityChecker {
public List<Node> hostsCausingFailure;
public HostRemovalFailure failureReason;
+ @Override
+ public String toString() {
+ return "failure path: " + failureReason + " upon removing " + hostsCausingFailure;
+ }
+
}
/**
@@ -346,17 +355,15 @@ public class CapacityChecker {
public AllocationFailureReasonList allocationFailures;
public static HostRemovalFailure none() {
- return new HostRemovalFailure(
- Optional.empty(),
- Optional.empty(),
- new AllocationFailureReasonList(List.of()));
+ return new HostRemovalFailure(Optional.empty(),
+ Optional.empty(),
+ new AllocationFailureReasonList(List.of()));
}
public static HostRemovalFailure create(Node host, Node tenant, AllocationFailureReasonList failureReasons) {
- return new HostRemovalFailure(
- Optional.of(host),
- Optional.of(tenant),
- failureReasons);
+ return new HostRemovalFailure(Optional.of(host),
+ Optional.of(tenant),
+ failureReasons);
}
private HostRemovalFailure(Optional<Node> host, Optional<Node> tenant, AllocationFailureReasonList allocationFailures) {
@@ -367,7 +374,7 @@ public class CapacityChecker {
@Override
public String toString() {
- if (host.isEmpty() || tenant.isEmpty()) return "No removal candidates exists.";
+ if (host.isEmpty() || tenant.isEmpty()) return "No removal candidates exists";
return String.format(
"Failure to remove host %s" +
"\n\tNo new host found for tenant %s:" +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index db331e88d64..2b4775cbeb1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -147,9 +147,12 @@ class MaintenanceDeployment implements Closeable {
/**
* Try to deploy to make this move.
*
+ * @param verifyTarget true to only make this move if the node ends up at the expected target host,
+ * false if we should perform it as long as it moves from the source host
* @return true if the move was done, false if it couldn't be
*/
- public boolean execute(Agent agent, Deployer deployer, Metric metric, NodeRepository nodeRepository) {
+ public boolean execute(boolean verifyTarget,
+ Agent agent, Deployer deployer, Metric metric, NodeRepository nodeRepository) {
if (isEmpty()) return false;
ApplicationId application = node.allocation().get().owner();
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) {
@@ -161,16 +164,19 @@ class MaintenanceDeployment implements Closeable {
Optional<Node> expectedNewNode = Optional.empty();
try {
if ( ! deployment.prepare()) return false;
- expectedNewNode =
- nodeRepository.getNodes(application, Node.State.reserved).stream()
- .filter(n -> !n.hostname().equals(node.hostname()))
- .filter(n -> n.allocation().get().membership().cluster().id().equals(node.allocation().get().membership().cluster().id()))
- .findAny();
- if (expectedNewNode.isEmpty()) return false;
- if ( ! expectedNewNode.get().hasParent(toHost.hostname())) return false;
+ if (verifyTarget) {
+ expectedNewNode =
+ nodeRepository.getNodes(application, Node.State.reserved).stream()
+ .filter(n -> !n.hostname().equals(node.hostname()))
+ .filter(n -> n.allocation().get().membership().cluster().id().equals(node.allocation().get().membership().cluster().id()))
+ .findAny();
+ if (expectedNewNode.isEmpty()) return false;
+ if (!expectedNewNode.get().hasParent(toHost.hostname())) return false;
+ }
if ( ! deployment.activate()) return false;
- log.info(agent + " redeployed " + application + " to " + this);
+ log.info(agent + " redeployed " + application + " to " +
+ ( verifyTarget ? this : "move " + (node.hostname() + " from " + fromHost)));
return true;
}
finally {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index c3f8afae4a4..7875afeb28c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -168,25 +168,24 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final NodeFailer.ThrottlePolicy throttlePolicy;
DefaultTimes(Zone zone) {
- failGrace = Duration.ofMinutes(30);
- periodicRedeployInterval = Duration.ofMinutes(30);
- // Don't redeploy in test environments
- redeployMaintainerInterval = Duration.ofMinutes(1);
- operatorChangeRedeployInterval = Duration.ofMinutes(1);
+ autoscalingInterval = Duration.ofMinutes(5);
+ dynamicProvisionerInterval = Duration.ofMinutes(5);
failedExpirerInterval = Duration.ofMinutes(10);
- provisionedExpiry = Duration.ofHours(4);
- spareCapacityMaintenanceInterval = Duration.ofMinutes(10);
- metricsInterval = Duration.ofMinutes(1);
+ failGrace = Duration.ofMinutes(30);
infrastructureProvisionInterval = Duration.ofMinutes(1);
- throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
loadBalancerExpirerInterval = Duration.ofMinutes(5);
- reservationExpiry = Duration.ofMinutes(15); // Need to be long enough for deployment to be finished for all config model versions
- dynamicProvisionerInterval = Duration.ofMinutes(5);
+ metricsInterval = Duration.ofMinutes(1);
+ nodeMetricsCollectionInterval = Duration.ofMinutes(1);
+ operatorChangeRedeployInterval = Duration.ofMinutes(1);
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
+ periodicRedeployInterval = Duration.ofMinutes(30);
+ provisionedExpiry = Duration.ofHours(4);
rebalancerInterval = Duration.ofMinutes(40);
- nodeMetricsCollectionInterval = Duration.ofMinutes(1);
- autoscalingInterval = Duration.ofMinutes(5);
+ redeployMaintainerInterval = Duration.ofMinutes(1);
+ reservationExpiry = Duration.ofMinutes(15); // Need to be long enough for deployment to be finished for all config model versions
scalingSuggestionsInterval = Duration.ofMinutes(31);
+ spareCapacityMaintenanceInterval = Duration.ofMinutes(10);
+ throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index e10ac68fde0..3df20fa9d08 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -46,7 +46,7 @@ public class Rebalancer extends NodeRepositoryMaintainer {
NodeList allNodes = nodeRepository().list();
updateSkewMetric(allNodes);
if ( ! zoneIsStable(allNodes)) return;
- findBestMove(allNodes).execute(Agent.Rebalancer, deployer, metric, nodeRepository());
+ findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
}
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 4179d6d3f83..a396bbda5c9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -68,11 +69,9 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
if (spareHostCapacity == 0) {
Move move = findMitigatingMove(failurePath.get());
- boolean success = move.execute(Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
- if (success) {
- // This may strictly be a (noble) lie: We succeeded in taking one step to mitigate,
- // but not necessarily *all* steps needed to justify adding 1 to spare capacity.
- // However, we alert on this being 0 and we don't want to do that as long as we're not stuck.
+ if (moving(move)) {
+ // We succeeded or are in the process of taking a step to mitigate.
+ // Report with the assumption this will eventually succeed to avoid alerting before we're stuck
spareHostCapacity++;
}
}
@@ -80,6 +79,12 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
}
+ private boolean moving(Move move) {
+ if (move.isEmpty()) return false;
+ if (move.node().allocation().get().membership().retired()) return true; // Move already in progress
+ return move.execute(false, Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
+ }
+
private Move findMitigatingMove(CapacityChecker.HostFailurePath failurePath) {
Optional<Node> nodeWhichCantMove = failurePath.failureReason.tenant;
if (nodeWhichCantMove.isEmpty()) return Move.empty();
@@ -237,9 +242,10 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
@Override
public List<Node> next() {
- next = null;
if ( ! hasNext()) throw new IllegalStateException("No more elements");
- return next;
+ var current = next;
+ next = null;
+ return current;
}
private boolean hasOneAtPosition(int position, int number) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 15be7796187..37842115949 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -386,15 +386,16 @@ public class NodeSerializer {
case "operator" : return Agent.operator;
case "application" : return Agent.application;
case "system" : return Agent.system;
- case "NodeFailer" : return Agent.NodeFailer;
- case "Rebalancer" : return Agent.Rebalancer;
case "DirtyExpirer" : return Agent.DirtyExpirer;
+ case "DynamicProvisioningMaintainer" : return Agent.DynamicProvisioningMaintainer;
case "FailedExpirer" : return Agent.FailedExpirer;
case "InactiveExpirer" : return Agent.InactiveExpirer;
+ case "NodeFailer" : return Agent.NodeFailer;
case "ProvisionedExpirer" : return Agent.ProvisionedExpirer;
+ case "Rebalancer" : return Agent.Rebalancer;
case "ReservationExpirer" : return Agent.ReservationExpirer;
- case "DynamicProvisioningMaintainer" : return Agent.DynamicProvisioningMaintainer;
case "RetiringUpgrader" : return Agent.RetiringUpgrader;
+ case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer;
}
throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'");
}
@@ -403,15 +404,16 @@ public class NodeSerializer {
case operator : return "operator";
case application : return "application";
case system : return "system";
- case NodeFailer : return "NodeFailer";
- case Rebalancer : return "Rebalancer";
case DirtyExpirer : return "DirtyExpirer";
+ case DynamicProvisioningMaintainer : return "DynamicProvisioningMaintainer";
case FailedExpirer : return "FailedExpirer";
case InactiveExpirer : return "InactiveExpirer";
+ case NodeFailer : return "NodeFailer";
case ProvisionedExpirer : return "ProvisionedExpirer";
+ case Rebalancer : return "Rebalancer";
case ReservationExpirer : return "ReservationExpirer";
- case DynamicProvisioningMaintainer : return "DynamicProvisioningMaintainer";
case RetiringUpgrader: return "RetiringUpgrader";
+ case SpareCapacityMaintainer: return "SpareCapacityMaintainer";
}
throw new IllegalArgumentException("Serialized form of '" + agent + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index 5ec5c2c08e8..95f98a351a1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -10,6 +10,9 @@ import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.HostFilter;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
import java.time.Clock;
@@ -27,18 +30,22 @@ import java.util.stream.Collectors;
*/
public class MockDeployer implements Deployer {
+ // For actual deploy mode
private final NodeRepositoryProvisioner provisioner;
private final Map<ApplicationId, ApplicationContext> applications;
- private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>();
+ // For mock deploy anything, changing wantToRetire to retired only
+ private final NodeRepository nodeRepository;
/** The number of redeployments done to this */
public int redeployments = 0;
+ private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>();
private final Clock clock;
private final ReentrantLock lock = new ReentrantLock();
private boolean failActivate = false;
+ /** Create a mock deployer which returns empty on every deploy request. */
@Inject
@SuppressWarnings("unused")
public MockDeployer() {
@@ -46,15 +53,30 @@ public class MockDeployer implements Deployer {
}
/**
- * Create a mock deployer which contains a substitute for an application repository, fullfilled to
+ * Create a mock deployer which returns a deployment on every request,
+ * and fulfills it by not actually deploying but only changing any wantToRetire nodes
+ * for the application to retired.
+ */
+ public MockDeployer(NodeRepository nodeRepository) {
+ this.provisioner = null;
+ this.applications = Map.of();
+ this.nodeRepository = nodeRepository;
+
+ this.clock = nodeRepository.clock();
+ }
+
+ /**
+ * Create a mock deployer which contains a substitute for an application repository, filled to
* be able to call provision with the right parameters.
*/
public MockDeployer(NodeRepositoryProvisioner provisioner,
Clock clock,
Map<ApplicationId, ApplicationContext> applications) {
this.provisioner = provisioner;
- this.clock = clock;
this.applications = new HashMap<>(applications);
+ this.nodeRepository = null;
+
+ this.clock = clock;
}
public ReentrantLock lock() { return lock; }
@@ -74,9 +96,13 @@ public class MockDeployer implements Deployer {
throw new RuntimeException(e);
}
try {
- return Optional.ofNullable(applications.get(id))
- .map(application -> new MockDeployment(provisioner, application));
- } finally {
+ if (provisioner != null)
+ return Optional.ofNullable(applications.get(id))
+ .map(application -> new MockDeployment(provisioner, application));
+ else
+ return Optional.of(new RetiringOnlyMockDeployment(nodeRepository, id));
+ }
+ finally {
lock.unlock();
}
}
@@ -135,6 +161,33 @@ public class MockDeployer implements Deployer {
}
+ public class RetiringOnlyMockDeployment implements Deployment {
+
+ private final NodeRepository nodeRepository;
+ private final ApplicationId applicationId;
+
+ private RetiringOnlyMockDeployment(NodeRepository nodeRepository, ApplicationId applicationId) {
+ this.nodeRepository = nodeRepository;
+ this.applicationId = applicationId;
+ }
+
+ @Override
+ public void prepare() { }
+
+ @Override
+ public void activate() {
+ redeployments++;
+ lastDeployTimes.put(applicationId, clock.instant());
+
+ for (Node node : nodeRepository.list().owner(applicationId).active().wantToRetire().asList())
+ nodeRepository.write(node.retire(nodeRepository.clock().instant()), nodeRepository.lock(node));
+ }
+
+ @Override
+ public void restart(HostFilter filter) {}
+
+ }
+
/** An application context which substitutes for an application repository */
public static class ApplicationContext {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
index 5813585554d..5e72cfc53ac 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
@@ -29,6 +29,7 @@ public class CapacityCheckerTest {
var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
assertTrue(failurePath.isPresent());
assertTrue(tester.nodeRepository.getNodes(NodeType.host).containsAll(failurePath.get().hostsCausingFailure));
+ assertEquals(5, failurePath.get().hostsCausingFailure.size());
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
index 464bdd6405a..62e9a227109 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
@@ -45,6 +45,7 @@ import java.util.stream.IntStream;
* @author mgimle
*/
public class CapacityCheckerTester {
+
public static final Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
// Components with state
@@ -138,8 +139,7 @@ public class CapacityCheckerTester {
.mapToObj(n -> String.format("%04X::%04X", hostindex, n))
.collect(Collectors.toSet());
- NodeResources nr = containingNodeResources(childResources,
- excessCapacity);
+ NodeResources nr = containingNodeResources(childResources, excessCapacity);
Node node = nodeRepository.createNode(hostname, hostname,
new IP.Config(Set.of("::"), availableIps), Optional.empty(),
new Flavor(nr), Optional.empty(), NodeType.host);
@@ -159,7 +159,8 @@ public class CapacityCheckerTester {
Set<String> availableIps = IntStream.range(0, ips)
.mapToObj(n -> String.format("%04X::%04X", hostid, n))
.collect(Collectors.toSet());
- Node node = nodeRepository.createNode(hostname, hostname,
+ Node node = nodeRepository.createNode(hostname,
+ hostname,
new IP.Config(Set.of("::"), availableIps), Optional.empty(),
new Flavor(capacity), Optional.empty(), NodeType.host);
hosts.add(node);
@@ -175,8 +176,8 @@ public class CapacityCheckerTester {
);
createNodes(childrenPerHost, numDistinctChildren, childResources,
- numHosts, hostExcessCapacity, hostExcessIps,
- numEmptyHosts, emptyHostExcessCapacity, emptyHostExcessIps);
+ numHosts, hostExcessCapacity, hostExcessIps,
+ numEmptyHosts, emptyHostExcessCapacity, emptyHostExcessIps);
}
void createNodes(int childrenPerHost, int numDistinctChildren, List<NodeResources> childResources,
int numHosts, NodeResources hostExcessCapacity, int hostExcessIps,
@@ -264,10 +265,11 @@ public class CapacityCheckerTester {
owner = ApplicationId.from(nodeModel.owner.tenant, nodeModel.owner.application, nodeModel.owner.instance);
}
- NodeResources.DiskSpeed diskSpeed;
- NodeResources nr = new NodeResources(nodeModel.minCpuCores, nodeModel.minMainMemoryAvailableGb,
- nodeModel.minDiskAvailableGb, nodeModel.bandwidth * 1000,
- nodeModel.fastDisk ? NodeResources.DiskSpeed.fast : NodeResources.DiskSpeed.slow);
+ NodeResources nr = new NodeResources(nodeModel.minCpuCores,
+ nodeModel.minMainMemoryAvailableGb,
+ nodeModel.minDiskAvailableGb,
+ nodeModel.bandwidth * 1000,
+ nodeModel.fastDisk ? NodeResources.DiskSpeed.fast : NodeResources.DiskSpeed.slow);
Flavor f = new Flavor(nr);
Node node = nodeRepository.createNode(nodeModel.id, nodeModel.hostname,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
new file mode 100644
index 00000000000..7d7b3910cdb
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
@@ -0,0 +1,214 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterMembership;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.DockerImage;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeFlavors;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.IP;
+import com.yahoo.vespa.hosted.provision.provisioning.EmptyProvisionServiceProvider;
+import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class SpareCapacityMaintainerTest {
+
+ @Test
+ public void testEmpty() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ }
+
+ @Test
+ public void testOneSpare() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testTwoSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(3, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(2, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testNoSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(2, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1), 0);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(0, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testAllWorksAsSpares() {
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(4, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(5, 50, 500, 0.5), 0);
+ tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2);
+ tester.maintainer.maintain();
+ assertEquals(0, tester.deployer.redeployments);
+ assertEquals(0, tester.nodeRepository.list().retired().size());
+ assertEquals(2, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ @Test
+ public void testMoveIsNeededToHaveSpares() {
+ // Moving application id 1 and 2 to the same nodes frees up spares for application 0
+ var tester = new SpareCapacityMaintainerTester();
+ tester.addHosts(6, new NodeResources(10, 100, 1000, 1));
+ tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1), 0);
+ tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2);
+ tester.addNodes(2, 2, new NodeResources(5, 50, 500, 0.5), 4);
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+
+ // Maintaining again is a no-op since the node to move is already retired
+ tester.maintainer.maintain();
+ assertEquals(1, tester.deployer.redeployments);
+ assertEquals(1, tester.nodeRepository.list().retired().size());
+ assertEquals(1, tester.metric.values.get("spareHostCapacity"));
+ }
+
+ private static class SpareCapacityMaintainerTester {
+
+ NodeRepository nodeRepository;
+ MockDeployer deployer;
+ TestMetric metric = new TestMetric();
+ SpareCapacityMaintainer maintainer;
+ private int hostIndex = 0;
+ private int nodeIndex = 0;
+
+ private SpareCapacityMaintainerTester() {
+ NodeFlavors flavors = new NodeFlavors(new FlavorConfigBuilder().build());
+ nodeRepository = new NodeRepository(flavors,
+ new EmptyProvisionServiceProvider().getHostResourcesCalculator(),
+ new MockCurator(),
+ new ManualClock(),
+ new Zone(Environment.prod, RegionName.from("us-east-3")),
+ new MockNameResolver().mockAnyLookup(),
+ DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"), true, false);
+ deployer = new MockDeployer(nodeRepository);
+ maintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, Duration.ofMinutes(1));
+ }
+
+ private void addHosts(int count, NodeResources resources) {
+ List<Node> hosts = new ArrayList<>();
+ for (int i = 0; i < count; i++) {
+ Node host = nodeRepository.createNode("host" + hostIndex,
+ "host" + hostIndex + ".yahoo.com",
+ ipConfig(hostIndex + nodeIndex, true),
+ Optional.empty(),
+ new Flavor(resources),
+ Optional.empty(),
+ NodeType.host);
+ hosts.add(host);
+ hostIndex++;
+ }
+ hosts = nodeRepository.addNodes(hosts, Agent.system);
+ hosts = nodeRepository.setReady(hosts, Agent.system, "Test");
+ var transaction = new NestedTransaction();
+ nodeRepository.activate(hosts, transaction);
+ transaction.commit();
+ }
+
+ private void addNodes(int id, int count, NodeResources resources, int hostOffset) {
+ List<Node> nodes = new ArrayList<>();
+ ApplicationId application = ApplicationId.from("tenant" + id, "application" + id, "default");
+ for (int i = 0; i < count; i++) {
+ ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id))
+ .group(ClusterSpec.Group.from(0))
+ .vespaVersion("7")
+ .build(),
+ i);
+ Node node = nodeRepository.createNode("node" + nodeIndex,
+ "node" + nodeIndex + ".yahoo.com",
+ ipConfig(hostIndex + nodeIndex, false),
+ Optional.of("host" + ( hostOffset + i) + ".yahoo.com"),
+ new Flavor(resources),
+ Optional.empty(),
+ NodeType.tenant);
+ node = node.allocate(application, membership, node.resources(), Instant.now());
+ nodes.add(node);
+ nodeIndex++;
+ }
+ nodes = nodeRepository.addNodes(nodes, Agent.system);
+ for (int i = 0; i < count; i++) {
+ Node node = nodes.get(i);
+ ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id))
+ .group(ClusterSpec.Group.from(0))
+ .vespaVersion("7")
+ .build(),
+ i);
+ node = node.allocate(application, membership, node.resources(), Instant.now());
+ nodes.set(i, node);
+ }
+ nodes = nodeRepository.reserve(nodes);
+ var transaction = new NestedTransaction();
+ nodes = nodeRepository.activate(nodes, transaction);
+ transaction.commit();
+ }
+
+ private IP.Config ipConfig(int id, boolean host) {
+ return new IP.Config(Set.of(String.format("%04X::%04X", id, 0)),
+ host ? IntStream.range(0, 10)
+ .mapToObj(n -> String.format("%04X::%04X", id, n))
+ .collect(Collectors.toSet())
+ : Set.of());
+ }
+
+ private void dumpState() {
+ for (Node host : nodeRepository.list().hosts().asList()) {
+ System.out.println("Host " + host.hostname() + " " + host.resources());
+ for (Node node : nodeRepository.list().childrenOf(host).asList())
+ System.out.println(" Node " + node.hostname() + " " + node.resources() + " allocation " +node.allocation());
+ }
+ }
+
+ }
+
+}