summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-11-02 21:33:27 +0100
committerValerij Fredriksen <valerijf@yahooinc.com>2022-11-02 21:33:27 +0100
commitd939ca00c4c4acc07374c7d862f892fe702cc328 (patch)
tree465746d6a6b4854ad6c52a8fe56c9a9b54ddc7f5 /node-repository
parent2a348d61213778f11c762de5f3570d8174f9f294 (diff)
Split DynamicProvisioningMaintainer into HostCapacityMaintainer and HostDeprovisioner
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java (renamed from node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java)80
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostDeprovisioner.java57
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java (renamed from node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java)182
4 files changed, 201 insertions, 123 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
index 245bce1b9e8..df04698e0b4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
@@ -37,6 +37,7 @@ import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Comparator;
+import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -50,19 +51,19 @@ import java.util.stream.Collectors;
* @author freva
* @author mpolden
*/
-public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
+public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
- private static final Logger log = Logger.getLogger(DynamicProvisioningMaintainer.class.getName());
+ private static final Logger log = Logger.getLogger(HostCapacityMaintainer.class.getName());
private final HostProvisioner hostProvisioner;
private final ListFlag<ClusterCapacity> preprovisionCapacityFlag;
private final JacksonFlag<SharedHost> sharedHostFlag;
- DynamicProvisioningMaintainer(NodeRepository nodeRepository,
- Duration interval,
- HostProvisioner hostProvisioner,
- FlagSource flagSource,
- Metric metric) {
+ HostCapacityMaintainer(NodeRepository nodeRepository,
+ Duration interval,
+ HostProvisioner hostProvisioner,
+ FlagSource flagSource,
+ Metric metric) {
super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource);
@@ -72,42 +73,46 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
@Override
protected double maintain() {
NodeList nodes = nodeRepository().nodes().list();
- convergeToCapacity(nodes);
- return 1.0;
- }
-
- /** Converge zone to wanted capacity */
- private void convergeToCapacity(NodeList nodes) {
List<Node> excessHosts;
try {
excessHosts = provision(nodes);
} catch (NodeAllocationException | IllegalStateException e) {
log.log(Level.WARNING, "Failed to allocate preprovisioned capacity and/or find excess hosts: " + e.getMessage());
- return; // avoid removing excess hosts
+ return 0; // avoid removing excess hosts
} catch (RuntimeException e) {
log.log(Level.WARNING, "Failed to allocate preprovisioned capacity and/or find excess hosts", e);
- return; // avoid removing excess hosts
+ return 0; // avoid removing excess hosts
}
- excessHosts.forEach(host -> {
- Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10));
- if (optionalMutex.isEmpty()) return;
- try (NodeMutex mutex = optionalMutex.get()) {
- if (host.state() != mutex.node().state()) return;
- host = mutex.node();
- // First mark the host as wantToDeprovision so that if hostProvisioner fails, this host
- // * won't get new nodes allocated to it
- // * will be selected as excess on next iteration of this maintainer
- nodeRepository().nodes().deprovision(host.hostname(), Agent.DynamicProvisioningMaintainer, nodeRepository().clock().instant());
- hostProvisioner.deprovision(host);
- nodeRepository().nodes().removeRecursively(host, true);
- } catch (UncheckedTimeoutException e) {
- log.log(Level.WARNING, "Failed to deprovision " + host.hostname() +
- ": Failed to get lock on node, will retry later");
- } catch (RuntimeException e) {
- log.log(Level.WARNING, "Failed to deprovision " + host.hostname() + ", will retry in " + interval(), e);
+ markForRemoval(excessHosts);
+ return 1;
+ }
+
+ private void markForRemoval(List<Node> excessHosts) {
+ if (excessHosts.isEmpty()) return;
+
+ try (var lock = nodeRepository().nodes().lockUnallocated()) {
+ NodeList nodes = nodeRepository().nodes().list(); // Reread nodes under lock
+ for (Node host : excessHosts) {
+ Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10));
+ if (optionalMutex.isEmpty()) continue;
+ try (NodeMutex mutex = optionalMutex.get()) {
+ host = mutex.node();
+ if (!canRemoveHost(host)) continue;
+ if (!nodes.childrenOf(host).stream().allMatch(HostCapacityMaintainer::canDeprovision))
+ continue;
+
+ // Retire the host to parked if possible, otherwise move it straight to parked
+ if (EnumSet.of(Node.State.reserved, Node.State.active, Node.State.inactive).contains(host.state())) {
+ Node retiredHost = host.withWantToRetire(true, true, Agent.DynamicProvisioningMaintainer, nodeRepository().clock().instant());
+ nodeRepository().nodes().write(retiredHost, mutex);
+ } else nodeRepository().nodes().park(host.hostname(), true, Agent.DynamicProvisioningMaintainer, "Parked for removal");
+ } catch (UncheckedTimeoutException e) {
+ log.log(Level.WARNING, "Failed to mark " + host.hostname() +
+ " for deprovisioning: Failed to get lock on node, will retry later");
+ }
}
- });
+ }
}
/**
@@ -153,7 +158,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
}
}
for (var node : nodes) {
- if (node.parentHostname().isPresent() && !canRemoveNode(node)) {
+ if (node.parentHostname().isPresent() && !canDeprovision(node)) {
removableHostsByHostname.remove(node.parentHostname().get());
}
}
@@ -169,12 +174,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
};
}
- private static boolean canRemoveNode(Node node) {
- if (node.type().isHost()) throw new IllegalArgumentException("Node " + node + " is not a child");
- return node.allocation().isEmpty() || canDeprovision(node);
- }
-
- private static boolean canDeprovision(Node node) {
+ static boolean canDeprovision(Node node) {
return node.status().wantToDeprovision() && (node.state() == Node.State.parked ||
node.state() == Node.State.failed);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostDeprovisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostDeprovisioner.java
new file mode 100644
index 00000000000..0d9df067f3f
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostDeprovisioner.java
@@ -0,0 +1,57 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
+
+import java.time.Duration;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * @author freva
+ */
+public class HostDeprovisioner extends NodeRepositoryMaintainer {
+
+ private static final Logger log = Logger.getLogger(HostDeprovisioner.class.getName());
+
+ private final HostProvisioner hostProvisioner;
+
+ HostDeprovisioner(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) {
+ super(nodeRepository, interval, metric);
+ this.hostProvisioner = hostProvisioner;
+ }
+
+ @Override
+ protected double maintain() {
+ NodeList allNodes = nodeRepository().nodes().list();
+ NodeList hosts = allNodes.parents().matching(HostCapacityMaintainer::canDeprovision);
+
+ int failures = 0;
+ for (Node host : hosts) {
+ // This shouldn't be possible since failed, parked, and wantToDeprovision should be recursive
+ if (!allNodes.childrenOf(host).stream().allMatch(HostCapacityMaintainer::canDeprovision))
+ continue;
+
+ try {
+ // Technically we should do this under application lock, but
+ // * HostProvisioner::deprovision may take some time since we are waiting for request(s) against
+ // the cloud provider
+ // * Because the application lock is shared between all hosts of the same type we want to avoid
+ // holding it over longer periods
+ // * We are about to remove these hosts anyway, so only reason we'd want to hold the lock is
+ // if we want to support aborting deprovision if operator manually intervenes
+ hostProvisioner.deprovision(host);
+ nodeRepository().nodes().removeRecursively(host, true);
+ } catch (RuntimeException e) {
+ failures++;
+ log.log(Level.WARNING, "Failed to deprovision " + host.hostname() + ", will retry in " + interval(), e);
+ }
+ }
+ return asSuccessFactor(hosts.size(), failures);
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index acc5dd66f7a..9436fcc150e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -70,7 +70,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
.ifPresent(maintainers::add);
provisionServiceProvider.getHostProvisioner()
.map(hostProvisioner -> List.of(
- new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric),
+ new HostCapacityMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric),
+ new HostDeprovisioner(nodeRepository, defaults.hostDeprovisionerInterval, metric, hostProvisioner),
new HostResumeProvisioner(nodeRepository, defaults.hostResumeProvisionerInterval, metric, hostProvisioner),
new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner),
new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner)))
@@ -113,6 +114,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration infrastructureProvisionInterval;
private final Duration loadBalancerExpirerInterval;
private final Duration dynamicProvisionerInterval;
+ private final Duration hostDeprovisionerInterval;
private final Duration hostResumeProvisionerInterval;
private final Duration diskReplacerInterval;
private final Duration osUpgradeActivatorInterval;
@@ -128,6 +130,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
DefaultTimes(Zone zone, Deployer deployer) {
autoscalingInterval = Duration.ofMinutes(5);
dynamicProvisionerInterval = Duration.ofMinutes(3);
+ hostDeprovisionerInterval = Duration.ofMinutes(3);
hostResumeProvisionerInterval = Duration.ofMinutes(3);
diskReplacerInterval = Duration.ofMinutes(3);
failedExpirerInterval = Duration.ofMinutes(10);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
index e882de3aa62..c2f66328a67 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
@@ -31,15 +31,20 @@ import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.Generation;
import com.yahoo.vespa.hosted.provision.node.IP;
+import com.yahoo.vespa.hosted.provision.node.Status;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.MockDuperModel;
import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import com.yahoo.vespa.service.duper.ConfigServerApplication;
import com.yahoo.vespa.service.duper.ConfigServerHostApplication;
import com.yahoo.vespa.service.duper.ControllerApplication;
import com.yahoo.vespa.service.duper.ControllerHostApplication;
+import com.yahoo.vespa.service.duper.InfraApplication;
+import com.yahoo.vespa.service.duper.TenantHostApplication;
import org.junit.Test;
import java.time.Duration;
@@ -63,7 +68,7 @@ import static org.junit.Assert.fail;
* @author freva
* @author mpolden
*/
-public class DynamicProvisioningMaintainerTest {
+public class HostCapacityMaintainerTest {
@Test
public void finds_nodes_that_need_deprovisioning_without_pre_provisioning() {
@@ -71,7 +76,7 @@ public class DynamicProvisioningMaintainerTest {
assertTrue(tester.nodeRepository.nodes().node("host2").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host3").isPresent());
- tester.maintainer.maintain();
+ tester.maintain();
assertTrue(tester.nodeRepository.nodes().node("host2").isEmpty());
assertTrue(tester.nodeRepository.nodes().node("host3").isEmpty());
}
@@ -83,7 +88,7 @@ public class DynamicProvisioningMaintainerTest {
Optional<Node> failedHost = tester.nodeRepository.nodes().node("host2");
assertTrue(failedHost.isPresent());
- tester.maintainer.maintain();
+ tester.maintain();
assertTrue("Failed host is deprovisioned", tester.nodeRepository.nodes().node(failedHost.get().hostname()).isEmpty());
assertEquals(1, tester.hostProvisioner.deprovisionedHosts());
}
@@ -97,24 +102,24 @@ public class DynamicProvisioningMaintainerTest {
ClusterCapacity.class);
assertEquals(0, tester.hostProvisioner.provisionedHosts().size());
- assertEquals(11, tester.nodeRepository.nodes().list().size());
+ assertEquals(9, tester.nodeRepository.nodes().list().size());
assertTrue(tester.nodeRepository.nodes().node("host2").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host2-1").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host3").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname100").isEmpty());
- assertTrue(tester.nodeRepository.nodes().node("hostname101").isEmpty());
+ assertTrue(tester.nodeRepository.nodes().node("host100").isEmpty());
+ assertTrue(tester.nodeRepository.nodes().node("host101").isEmpty());
- tester.maintainer.maintain();
+ tester.maintain();
assertEquals(2, tester.hostProvisioner.provisionedHosts().size());
assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
NodeList nodesAfter = tester.nodeRepository.nodes().list();
- assertEquals(11, nodesAfter.size()); // 2 removed, 2 added
+ assertEquals(9, nodesAfter.size()); // 2 removed, 2 added
assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.nodes().node("host2").isEmpty());
assertTrue("Node on deprovisioned host removed", tester.nodeRepository.nodes().node("host2-1").isEmpty());
assertTrue("Host satisfying 16-24-100-1 is kept", tester.nodeRepository.nodes().node("host3").isPresent());
- assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("hostname100").isPresent());
- assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("hostname101").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("host100").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("host101").isPresent());
}
@Test
@@ -128,50 +133,50 @@ public class DynamicProvisioningMaintainerTest {
ClusterCapacity.class);
assertEquals(0, tester.hostProvisioner.provisionedHosts().size());
- assertEquals(11, tester.nodeRepository.nodes().list().size());
+ assertEquals(9, tester.nodeRepository.nodes().list().size());
assertTrue(tester.nodeRepository.nodes().node("host2").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host2-1").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host3").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname100").isEmpty());
+ assertTrue(tester.nodeRepository.nodes().node("host100").isEmpty());
- // The first cluster will be allocated to host3 and a new host hostname100.
- // hostname100 will be a large shared host specified above.
- tester.maintainer.maintain();
+ // The first cluster will be allocated to host3 and a new host host100.
+ // host100 will be a large shared host specified above.
+ tester.maintain();
verifyFirstMaintain(tester);
// Second maintain should be a no-op, otherwise we did wrong in the first maintain.
- tester.maintainer.maintain();
+ tester.maintain();
verifyFirstMaintain(tester);
- // Add a second cluster equal to the first. It should fit on existing host3 and hostname100.
+ // Add a second cluster equal to the first. It should fit on existing host3 and host100.
tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(),
List.of(new ClusterCapacity(2, 1, 30, 20, 3.0),
new ClusterCapacity(2, 1, 30, 20, 3.0)),
ClusterCapacity.class);
- tester.maintainer.maintain();
+ tester.maintain();
verifyFirstMaintain(tester);
- // Change second cluster such that it doesn't fit on host3, but does on hostname100,
+ // Change second cluster such that it doesn't fit on host3, but does on host100,
// and with a size of 2 it should allocate a new shared host.
// The node allocation code prefers to allocate to the shared hosts instead of host3 (at least
- // in this test, due to skew), so host3 will be deprovisioned when hostname101 is provisioned.
- // host3 is a 24-64-100-10 while hostname100 is 48-128-1000-10.
+ // in this test, due to skew), so host3 will be deprovisioned when host101 is provisioned.
+ // host3 is a 24-64-100-10 while host100 is 48-128-1000-10.
tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(),
List.of(new ClusterCapacity(2, 1, 30, 20, 3.0),
new ClusterCapacity(2, 24, 64, 100, 1.0)),
ClusterCapacity.class);
- tester.maintainer.maintain();
+ tester.maintain();
assertEquals(2, tester.hostProvisioner.provisionedHosts().size());
assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(10, tester.nodeRepository.nodes().list().size()); // 3 removed, 2 added
+ assertEquals(8, tester.nodeRepository.nodes().list().size()); // 3 removed, 2 added
assertTrue("preprovision capacity is prefered on shared hosts", tester.nodeRepository.nodes().node("host3").isEmpty());
- assertTrue(tester.nodeRepository.nodes().node("hostname100").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname101").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host100").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host101").isPresent());
// If the preprovision capacity is reduced, we should see shared hosts deprovisioned.
@@ -179,18 +184,18 @@ public class DynamicProvisioningMaintainerTest {
List.of(new ClusterCapacity(1, 1, 30, 20, 3.0)),
ClusterCapacity.class);
- tester.maintainer.maintain();
+ tester.maintain();
assertEquals("one provisioned host has been deprovisioned, so there are 2 -> 1 provisioned hosts",
1, tester.hostProvisioner.provisionedHosts().size());
assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(9, tester.nodeRepository.nodes().list().size()); // 4 removed, 2 added
- if (tester.nodeRepository.nodes().node("hostname100").isPresent()) {
- assertTrue("hostname101 is superfluous and should have been deprovisioned",
- tester.nodeRepository.nodes().node("hostname101").isEmpty());
+ assertEquals(7, tester.nodeRepository.nodes().list().size()); // 4 removed, 2 added
+ if (tester.nodeRepository.nodes().node("host100").isPresent()) {
+ assertTrue("host101 is superfluous and should have been deprovisioned",
+ tester.nodeRepository.nodes().node("host101").isEmpty());
} else {
- assertTrue("hostname101 is required for preprovision capacity",
- tester.nodeRepository.nodes().node("hostname101").isPresent());
+ assertTrue("host101 is required for preprovision capacity",
+ tester.nodeRepository.nodes().node("host101").isPresent());
}
}
@@ -198,11 +203,11 @@ public class DynamicProvisioningMaintainerTest {
private void verifyFirstMaintain(DynamicProvisioningTester tester) {
assertEquals(1, tester.hostProvisioner.provisionedHosts().size());
assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(10, tester.nodeRepository.nodes().list().size()); // 2 removed, 1 added
+ assertEquals(8, tester.nodeRepository.nodes().list().size()); // 2 removed, 1 added
assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.nodes().node("host2").isEmpty());
assertTrue("Node on deprovisioned host removed", tester.nodeRepository.nodes().node("host2-1").isEmpty());
assertTrue("One 1-30-20-3 node fits on host3", tester.nodeRepository.nodes().node("host3").isPresent());
- assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("hostname100").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("host100").isPresent());
}
@Test
@@ -239,12 +244,12 @@ public class DynamicProvisioningMaintainerTest {
tester.hostProvisioner.overrideHostFlavor("host4");
tester.flagSource.withJacksonFlag(PermanentFlags.SHARED_HOST.id(), new SharedHost(null, minCount), SharedHost.class);
- tester.maintainer.maintain();
+ tester.maintain();
assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts().size());
assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts());
// Verify next maintain is a no-op
- tester.maintainer.maintain();
+ tester.maintain();
assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts().size());
assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts());
}
@@ -255,7 +260,7 @@ public class DynamicProvisioningMaintainerTest {
Node host2 = tester.addNode("host2", Optional.empty(), NodeType.host, Node.State.failed, DynamicProvisioningTester.tenantApp);
tester.hostProvisioner.with(Behaviour.failDeprovisioning);
- tester.maintainer.maintain();
+ tester.maintain();
assertTrue(tester.nodeRepository.nodes().node(host2.hostname()).isPresent());
}
@@ -266,7 +271,7 @@ public class DynamicProvisioningMaintainerTest {
tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(),
List.of(new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), resources1.bandwidthGbps())),
ClusterCapacity.class);
- tester.maintainer.maintain();
+ tester.maintain();
// Hosts are provisioned
assertEquals(2, tester.provisionedHostsMatching(resources1));
@@ -312,10 +317,10 @@ public class DynamicProvisioningMaintainerTest {
List.of(new ClusterCapacity(3, 0, 0, 0, 0.0)),
ClusterCapacity.class);
assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources));
- assertTrue(tester.nodeRepository.nodes().node("hostname102").isEmpty());
- tester.maintainer.maintain();
+ assertTrue(tester.nodeRepository.nodes().node("host102").isEmpty());
+ tester.maintain();
assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources));
- assertTrue(tester.nodeRepository.nodes().node("hostname102").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host102").isPresent());
// Next maintenance run does nothing
tester.assertNodesUnchanged();
@@ -340,14 +345,14 @@ public class DynamicProvisioningMaintainerTest {
ClusterCapacity.class);
assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources));
- assertTrue(tester.nodeRepository.nodes().node("hostname102").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname103").isEmpty());
- assertTrue(tester.nodeRepository.nodes().node("hostname104").isEmpty());
- tester.maintainer.maintain();
+ assertTrue(tester.nodeRepository.nodes().node("host102").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host103").isEmpty());
+ assertTrue(tester.nodeRepository.nodes().node("host104").isEmpty());
+ tester.maintain();
assertEquals(3, tester.provisionedHostsMatching(sharedHostNodeResources));
- assertTrue(tester.nodeRepository.nodes().node("hostname102").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname103").isPresent());
- assertTrue(tester.nodeRepository.nodes().node("hostname104").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host102").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host103").isPresent());
+ assertTrue(tester.nodeRepository.nodes().node("host104").isPresent());
}
@Test
@@ -360,7 +365,7 @@ public class DynamicProvisioningMaintainerTest {
// cfghost3 is active before maintain, and active after:
assertCfghost3IsActive(tester);
- tester.maintainer.maintain();
+ tester.maintain();
assertCfghost3IsActive(tester);
// But when cfghost3 is moved to parked w/wantToDeprovision, maintain() should deprovision
@@ -369,7 +374,7 @@ public class DynamicProvisioningMaintainerTest {
Agent.operator,
Instant.now());
tester.nodeRepository.database().writeTo(Node.State.parked, parkedWithWantToDeprovision, Agent.operator, Optional.empty());
- tester.maintainer.maintain();
+ tester.maintain();
assertCfghost3IsDeprovisioned(tester);
}
@@ -452,7 +457,7 @@ public class DynamicProvisioningMaintainerTest {
} catch (IllegalArgumentException ignored) {}
// Host and child is removed
- dynamicProvisioningTester.maintainer.maintain();
+ dynamicProvisioningTester.maintain();
allNodes = tester.nodeRepository().nodes().list();
assertEquals(2, allNodes.nodeType(hostType).size());
assertEquals(2, allNodes.nodeType(hostType.childNodeType()).size());
@@ -470,7 +475,7 @@ public class DynamicProvisioningMaintainerTest {
Node newNode = tester.nodeRepository().nodes().list(Node.State.reserved).nodeType(hostType.childNodeType()).first().get();
// Resume provisioning and activate host
- dynamicProvisioningTester.maintainer.maintain();
+ dynamicProvisioningTester.maintain();
List<ProvisionedHost> newHosts = dynamicProvisioningTester.hostProvisioner.provisionedHosts();
assertEquals(1, newHosts.size());
tester.nodeRepository().nodes().setReady(newHosts.get(0).hostHostname(), Agent.operator, getClass().getSimpleName());
@@ -544,7 +549,7 @@ public class DynamicProvisioningMaintainerTest {
}
// Host and children remain parked because one child is still active
- tester.maintainer.maintain();
+ tester.maintain();
for (var node : List.of(host4, host41)) {
assertEquals(Node.State.parked, tester.nodeRepository.nodes().node(node.hostname()).get().state());
}
@@ -555,14 +560,14 @@ public class DynamicProvisioningMaintainerTest {
tester.nodeRepository.nodes().park(host42.hostname(), false, Agent.system, getClass().getSimpleName());
// Host and children can now be removed
- tester.maintainer.maintain();
+ tester.maintain();
for (var node : List.of(host4, host41, host42, host43)) {
assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty());
}
}
private void provisionHostsIn(CloudAccount cloudAccount, int count, DynamicProvisioningTester tester) {
- tester.maintainer.maintain();
+ tester.maintain();
List<String> provisionedHostnames = tester.hostProvisioner.provisionedHosts().stream()
.filter(host -> host.cloudAccount().equals(cloudAccount))
.map(ProvisionedHost::hostHostname)
@@ -571,7 +576,7 @@ public class DynamicProvisioningMaintainerTest {
for (var hostname : provisionedHostnames) {
tester.provisioningTester.nodeRepository().nodes().setReady(hostname, Agent.operator, getClass().getSimpleName());
}
- tester.provisioningTester.prepareAndActivateInfraApplication(DynamicProvisioningTester.tenantHostApp, NodeType.host);
+ tester.provisioningTester.prepareAndActivateInfraApplication(DynamicProvisioningTester.tenantHostApp.getApplicationId(), NodeType.host);
NodeList activeHosts = tester.provisioningTester.nodeRepository().nodes()
.list(Node.State.active)
.nodeType(NodeType.host)
@@ -596,18 +601,21 @@ public class DynamicProvisioningMaintainerTest {
private static class DynamicProvisioningTester {
+ private static final InfraApplication tenantHostApp = new TenantHostApplication();
+ private static final InfraApplication configServerHostApp = new ConfigServerHostApplication();
+ private static final InfraApplication configServerApp = new ConfigServerApplication();
private static final ApplicationId tenantApp = ApplicationId.from("mytenant", "myapp", "default");
- private static final ApplicationId tenantHostApp = ApplicationId.from("vespa", "tenant-host", "default");
- private static final ApplicationId proxyHostApp = ApplicationId.from("vespa", "proxy-host", "default");
- private static final ApplicationId proxyApp = ApplicationId.from("vespa", "proxy", "default");
private static final NodeFlavors flavors = FlavorConfigBuilder.createDummies("default", "docker", "host2", "host3", "host4");
private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
private final NodeRepository nodeRepository;
private final MockHostProvisioner hostProvisioner;
- private final DynamicProvisioningMaintainer maintainer;
private final ProvisioningTester provisioningTester;
+ private final HostCapacityMaintainer capacityMaintainer;
+ private final HostResumeProvisioner resumeProvisioner;
+ private final HostDeprovisioner deprovisioner;
+ private final InfraDeployerImpl infraDeployer;
public DynamicProvisioningTester() {
this(Cloud.builder().dynamicProvisioning(true).build(), new MockNameResolver());
@@ -624,37 +632,38 @@ public class DynamicProvisioningMaintainerTest {
.hostProvisioner(hostProvisioner)
.build();
this.nodeRepository = provisioningTester.nodeRepository();
- this.maintainer = new DynamicProvisioningMaintainer(nodeRepository,
- Duration.ofDays(1),
- hostProvisioner,
- flagSource,
- new TestMetric());
+ this.capacityMaintainer = new HostCapacityMaintainer(
+ nodeRepository, Duration.ofDays(1), hostProvisioner, flagSource, new TestMetric());
+ this.resumeProvisioner = new HostResumeProvisioner(nodeRepository, Duration.ofDays(1), new TestMetric(), hostProvisioner);
+ this.deprovisioner = new HostDeprovisioner(nodeRepository, Duration.ofDays(1), new TestMetric(), hostProvisioner);
+
+ MockDuperModel mockDuperModel = new MockDuperModel()
+ .support(configServerHostApp).support(tenantHostApp);
+ this.infraDeployer = new InfraDeployerImpl(nodeRepository, provisioningTester.provisioner(), mockDuperModel);
}
private DynamicProvisioningTester addInitialNodes() {
- List.of(createNode("host1", Optional.empty(), NodeType.host, Node.State.active, Optional.of(tenantHostApp)),
- createNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)),
- createNode("host1-2", Optional.of("host1"), NodeType.tenant, Node.State.failed, Optional.empty()),
- createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, Optional.of(tenantHostApp)),
- createNode("host2-1", Optional.of("host2"), NodeType.tenant, Node.State.failed, Optional.empty()),
- createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty(),
+ List.of(createNode("host1", Optional.empty(), NodeType.host, Node.State.active, tenantHostApp.getApplicationId()),
+ createNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.reserved, tenantApp),
+ createNode("host1-2", Optional.of("host1"), NodeType.tenant, Node.State.failed, tenantApp),
+ createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, tenantHostApp.getApplicationId()),
+ createNode("host2-1", Optional.of("host2"), NodeType.tenant, Node.State.failed, tenantApp),
+ createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, null,
"host3-1", "host3-2", "host3-3", "host3-4", "host3-5"),
- createNode("host4", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty()),
- createNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)),
- createNode("proxyhost1", Optional.empty(), NodeType.proxyhost, Node.State.provisioned, Optional.empty()),
- createNode("proxyhost2", Optional.empty(), NodeType.proxyhost, Node.State.active, Optional.of(proxyHostApp)),
- createNode("proxy2", Optional.of("proxyhost2"), NodeType.proxy, Node.State.active, Optional.of(proxyApp)))
+ createNode("host4", Optional.empty(), NodeType.host, Node.State.provisioned, null),
+ createNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.reserved, tenantApp),
+ createNode("host4-2", Optional.of("host4"), NodeType.tenant, Node.State.reserved, tenantApp))
.forEach(node -> nodeRepository.database().addNodesInState(List.of(node), node.state(), Agent.system));
return this;
}
private Node addCfghost(int index, boolean makeChild) {
Node cfghost = addNode("cfghost" + index, Optional.empty(), NodeType.confighost,
- Node.State.active, new ConfigServerHostApplication().getApplicationId());
+ Node.State.active, configServerHostApp.getApplicationId());
if (makeChild) {
addNode("cfg" + index, Optional.of("cfghost" + index), NodeType.config,
- Node.State.active, new ConfigServerApplication().getApplicationId());
+ Node.State.active, configServerApp.getApplicationId());
}
return cfghost;
@@ -665,14 +674,14 @@ public class DynamicProvisioningMaintainerTest {
}
private Node addNode(String hostname, Optional<String> parentHostname, NodeType nodeType, Node.State state, ApplicationId application) {
- Node node = createNode(hostname, parentHostname, nodeType, state, Optional.ofNullable(application));
+ Node node = createNode(hostname, parentHostname, nodeType, state, application);
return nodeRepository.database().addNodesInState(List.of(node), node.state(), Agent.system).get(0);
}
private Node createNode(String hostname, Optional<String> parentHostname, NodeType nodeType,
- Node.State state, Optional<ApplicationId> application, String... additionalHostnames) {
+ Node.State state, ApplicationId application, String... additionalHostnames) {
Flavor flavor = nodeRepository.flavors().getFlavor(parentHostname.isPresent() ? "docker" : "host3").orElseThrow();
- Optional<Allocation> allocation = application
+ Optional<Allocation> allocation = Optional.ofNullable(application)
.map(app -> new Allocation(
app,
ClusterMembership.from("container/default/0/0", Version.fromString("7.3"), Optional.empty()),
@@ -684,6 +693,8 @@ public class DynamicProvisioningMaintainerTest {
.ipConfig(new IP.Config(state == Node.State.active ? Set.of("::1") : Set.of(), Set.of(), addresses));
parentHostname.ifPresent(builder::parentHostname);
allocation.ifPresent(builder::allocation);
+ if (hostname.equals("host2-1"))
+ builder.status(Status.initial().withWantToRetire(true, true, false));
return builder.build();
}
@@ -695,10 +706,17 @@ public class DynamicProvisioningMaintainerTest {
private void assertNodesUnchanged() {
NodeList nodes = nodeRepository.nodes().list();
- maintainer.maintain();
+ maintain();
assertEquals("Nodes are unchanged after maintenance run", nodes, nodeRepository.nodes().list());
}
+ private void maintain() {
+ resumeProvisioner.maintain();
+ capacityMaintainer.maintain();
+ infraDeployer.activateAllSupportedInfraApplications(true);
+ deprovisioner.maintain();
+ }
+
}
}