aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-06-07 22:48:03 +0200
committerGitHub <noreply@github.com>2023-06-07 22:48:03 +0200
commit5a0f33a14fb7a2a50850a78d7673d837a49d2f28 (patch)
tree115d9245faef7ceb5ec1dd26944a78b5be785a5f /node-repository
parent59c924d6ad51d19667c86afc072fa67e32066e6a (diff)
parentf3953b0b098dfca3d3c101897699ec1d283fdd29 (diff)
Merge pull request #27336 from vespa-engine/mpolden/always-track-deprovisioned-hosts
Track deprovisioned hosts in dynamically provisioned zones
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java31
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirerTest.java56
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java38
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json3
8 files changed, 120 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java
new file mode 100644
index 00000000000..fcfde53c0df
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.History;
+
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * This removes hosts from {@link com.yahoo.vespa.hosted.provision.Node.State#deprovisioned}, after a grace period.
+ *
+ * @author mpolden
+ */
+public class DeprovisionedExpirer extends Expirer {
+
+ DeprovisionedExpirer(NodeRepository nodeRepository, Duration expiryTime, Metric metric) {
+ super(Node.State.deprovisioned, History.Event.Type.deprovisioned, nodeRepository, expiryTime, metric);
+ }
+
+ @Override
+ protected void expire(List<Node> expired) {
+ if (!nodeRepository().zone().cloud().dynamicProvisioning()) return; // Never expire in statically provisioned zones
+ for (var node : expired) {
+ nodeRepository().nodes().forget(node);
+ }
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
index 5a77fcca85c..a8929cf9d22 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
@@ -12,8 +12,7 @@ import java.util.List;
import java.util.logging.Logger;
/**
- * Superclass of expiry tasks which moves nodes from some state to the dirty state.
- * These jobs runs at least every 25 minutes.
+ * Base class for maintenance of nodes that linger in a particular state too long.
*
* @author bratseth
*/
@@ -61,6 +60,6 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
/** Implement this callback to take action to expire these nodes */
- protected abstract void expire(List<Node> node);
+ protected abstract void expire(List<Node> expired);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index c7d5efe376b..f6391a7d475 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -60,6 +60,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
maintainers.add(new AutoscalingMaintainer(nodeRepository, deployer, metric, defaults.autoscalingInterval));
maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, defaults.scalingSuggestionsInterval, metric));
maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer));
+ maintainers.add(new DeprovisionedExpirer(nodeRepository, defaults.deprovisionedExpiry, metric));
provisionServiceProvider.getLoadBalancerService()
.map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric))
@@ -121,6 +122,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration switchRebalancerInterval;
private final Duration hostRetirerInterval;
private final Duration hostFlavorUpgraderInterval;
+ private final Duration deprovisionedExpiry;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -155,6 +157,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
hostRetirerInterval = Duration.ofMinutes(30);
hostFlavorUpgraderInterval = Duration.ofMinutes(30);
+ // CD (de)provisions hosts frequently. Expire deprovisioned ones earlier
+ deprovisionedExpiry = isCdZone ? Duration.ofDays(1) : Duration.ofDays(30);
if (zone.environment().isProduction() && ! isCdZone) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 5426d818db5..cc49265506c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -520,11 +520,7 @@ public class Nodes {
db.removeNodes(removed, transaction);
} else {
removed = removeChildren(node, force, transaction);
- if (zone.cloud().dynamicProvisioning()) {
- db.removeNodes(List.of(node), transaction);
- } else {
- move(node.hostname(), Node.State.deprovisioned, Agent.system, false, Optional.empty(), transaction);
- }
+ move(node.hostname(), Node.State.deprovisioned, Agent.system, false, Optional.empty(), transaction);
removed.add(node);
}
transaction.commit();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index e24b6fcf7f5..b202a10c4d3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -362,6 +362,7 @@ class NodeAllocation {
// Infrastructure hosts have fixed indices, starting at 1
Set<Integer> currentIndices = allNodes.nodeType(hostType)
+ .not().state(Node.State.deprovisioned)
.hostnames()
.stream()
// TODO(mpolden): Use cluster index instead of parsing hostname, once all
@@ -375,7 +376,7 @@ class NodeAllocation {
indices.add(i);
}
}
- // Ignore our own index as we should never try to provision ourself. This can happen in the following scenario:
+ // Ignore our own index as we should never try to provision ourselves. This can happen in the following scenario:
// - cfg1 has been deprovisioned
// - cfg2 has triggered provisioning of a new cfg1
// - cfg1 is starting and redeploys its infrastructure application during bootstrap. A deficit is detected
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirerTest.java
new file mode 100644
index 00000000000..4100fe39eca
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirerTest.java
@@ -0,0 +1,56 @@
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.NodeFlavors;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner;
+import org.junit.jupiter.api.Test;
+
+import java.time.Duration;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertSame;
+
+/**
+ * @author mpolden
+ */
+class DeprovisionedExpirerTest {
+
+ private final NodeFlavors flavors = FlavorConfigBuilder.createDummies("host");
+ private final ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning()
+ .flavors(flavors.getFlavors())
+ .hostProvisioner(new MockHostProvisioner(flavors.getFlavors()))
+ .build();
+ private final DeprovisionedExpirer expirer = new DeprovisionedExpirer(tester.nodeRepository(), Duration.ofDays(30),
+ new TestMetric());
+
+ @Test
+ public void maintain() {
+ tester.makeReadyHosts(1, new NodeResources(2,4,8,1))
+ .activateTenantHosts();
+ NodeList hosts = tester.nodeRepository().nodes().list().state(Node.State.active);
+ assertEquals(1, hosts.size());
+
+ // Remove host
+ String hostname = hosts.first().get().hostname();
+ tester.nodeRepository().nodes().park(hostname, false, Agent.system, getClass().getSimpleName());
+ tester.nodeRepository().nodes().removeRecursively(hostname);
+ assertSame(Node.State.deprovisioned, tester.node(hostname).state());
+
+ // Host is not removed until expiry passes
+ assertExpiredAfter(Duration.ZERO, false, hostname);
+ assertExpiredAfter(Duration.ofDays(15), false, hostname);
+ assertExpiredAfter(Duration.ofDays(15), true, hostname);
+ }
+
+ private void assertExpiredAfter(Duration duration, boolean expired, String hostname) {
+ tester.clock().advance(duration);
+ expirer.maintain();
+ assertEquals(expired, tester.nodeRepository().nodes().node(hostname).isEmpty());
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
index cfa76419262..ead94663807 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
@@ -83,7 +83,7 @@ public class HostCapacityMaintainerTest {
assertTrue(tester.nodeRepository.nodes().node("host3").isPresent());
tester.maintain();
- assertTrue(tester.nodeRepository.nodes().node("host2").isEmpty());
+ assertSame(State.deprovisioned, tester.nodeRepository.nodes().node("host2").get().state());
}
@Test
@@ -94,7 +94,7 @@ public class HostCapacityMaintainerTest {
assertTrue(failedHost.isPresent());
tester.maintain();
- assertTrue("Failed host is deprovisioned", tester.nodeRepository.nodes().node(failedHost.get().hostname()).isEmpty());
+ assertSame("Failed host is deprovisioned", State.deprovisioned, tester.nodeRepository.nodes().node(failedHost.get().hostname()).get().state());
assertEquals(1, tester.hostProvisioner.deprovisionedHosts());
}
@@ -118,9 +118,9 @@ public class HostCapacityMaintainerTest {
assertEquals(2, tester.hostProvisioner.provisionedHosts().size());
assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- NodeList nodesAfter = tester.nodeRepository.nodes().list();
+ NodeList nodesAfter = tester.nodeRepository.nodes().list().not().state(State.deprovisioned);
assertEquals(9, nodesAfter.size()); // 2 removed, 2 added
- assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.nodes().node("host2").isEmpty());
+ assertSame("Failed host 'host2' is deprovisioned", State.deprovisioned, tester.nodeRepository.nodes().node("host2").get().state());
assertTrue("Node on deprovisioned host removed", tester.nodeRepository.nodes().node("host2-1").isEmpty());
assertTrue("Host satisfying 16-24-100-1 is kept", tester.nodeRepository.nodes().node("host3").isPresent());
assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("host100").isPresent());
@@ -177,8 +177,8 @@ public class HostCapacityMaintainerTest {
assertEquals(2, tester.hostProvisioner.provisionedHosts().size());
assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(8, tester.nodeRepository.nodes().list().size()); // 3 removed, 2 added
- assertTrue("preprovision capacity is prefered on shared hosts", tester.nodeRepository.nodes().node("host3").isEmpty());
+ assertEquals(8, tester.nodeRepository.nodes().list().not().state(State.deprovisioned).size()); // 3 removed, 2 added
+ assertSame("preprovision capacity is prefered on shared hosts", State.deprovisioned, tester.nodeRepository.nodes().node("host3").get().state());
assertTrue(tester.nodeRepository.nodes().node("host100").isPresent());
assertTrue(tester.nodeRepository.nodes().node("host101").isPresent());
@@ -193,13 +193,13 @@ public class HostCapacityMaintainerTest {
assertEquals("one provisioned host has been deprovisioned, so there are 2 -> 1 provisioned hosts",
1, tester.hostProvisioner.provisionedHosts().size());
assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(7, tester.nodeRepository.nodes().list().size()); // 4 removed, 2 added
+ assertEquals(7, tester.nodeRepository.nodes().list().not().state(State.deprovisioned).size()); // 4 removed, 2 added
if (tester.nodeRepository.nodes().node("host100").isPresent()) {
- assertTrue("host101 is superfluous and should have been deprovisioned",
- tester.nodeRepository.nodes().node("host101").isEmpty());
+ assertSame("host101 is superfluous and should have been deprovisioned", State.deprovisioned,
+ tester.nodeRepository.nodes().node("host101").get().state());
} else {
assertTrue("host101 is required for preprovision capacity",
- tester.nodeRepository.nodes().node("host101").isPresent());
+ tester.nodeRepository.nodes().node("host101").isPresent());
}
}
@@ -207,8 +207,8 @@ public class HostCapacityMaintainerTest {
private void verifyFirstMaintain(DynamicProvisioningTester tester) {
assertEquals(1, tester.hostProvisioner.provisionedHosts().size());
assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
- assertEquals(8, tester.nodeRepository.nodes().list().size()); // 2 removed, 1 added
- assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.nodes().node("host2").isEmpty());
+ assertEquals(8, tester.nodeRepository.nodes().list().not().state(State.deprovisioned).size()); // 2 removed, 1 added
+ assertSame("Failed host 'host2' is deprovisioned", State.deprovisioned, tester.nodeRepository.nodes().node("host2").get().state());
assertTrue("Node on deprovisioned host removed", tester.nodeRepository.nodes().node("host2-1").isEmpty());
assertTrue("One 1-30-20-3 node fits on host3", tester.nodeRepository.nodes().node("host3").isPresent());
assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.nodes().node("host100").isPresent());
@@ -390,7 +390,7 @@ public class HostCapacityMaintainerTest {
tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
// Expected number of hosts and children are provisioned
- NodeList allNodes = tester.nodeRepository().nodes().list();
+ NodeList allNodes = tester.nodeRepository().nodes().list().not().state(State.deprovisioned);
NodeList configHosts = allNodes.nodeType(hostType);
NodeList configNodes = allNodes.nodeType(hostType.childNodeType());
assertEquals(3, configHosts.size());
@@ -427,7 +427,7 @@ public class HostCapacityMaintainerTest {
// Host and child is removed
dynamicProvisioningTester.maintain();
- allNodes = tester.nodeRepository().nodes().list();
+ allNodes = tester.nodeRepository().nodes().list().not().state(State.deprovisioned);
assertEquals(2, allNodes.nodeType(hostType).size());
assertEquals(2, allNodes.nodeType(hostType.childNodeType()).size());
@@ -585,7 +585,7 @@ public class HostCapacityMaintainerTest {
// Host and children can now be removed.
tester.provisioningTester.activateTenantHosts();
tester.maintain();
- assertEquals(List.of(), tester.nodeRepository.nodes().list().asList());
+ assertEquals(List.of(), tester.nodeRepository.nodes().list().not().state(State.deprovisioned).asList());
}
@Test
@@ -617,7 +617,11 @@ public class HostCapacityMaintainerTest {
// Host and children can now be removed.
tester.maintain();
for (var node : List.of(host4, host41, host42, host43)) {
- assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty());
+ if (node.type().isHost()) {
+ assertSame(node.hostname() + " moved to deprovisioned", State.deprovisioned, tester.nodeRepository.nodes().node(node.hostname()).get().state());
+ } else {
+ assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty());
+ }
}
}
@@ -651,7 +655,7 @@ public class HostCapacityMaintainerTest {
private void assertCfghost3IsDeprovisioned(DynamicProvisioningTester tester) {
assertEquals(4, tester.nodeRepository.nodes().list(Node.State.active).size());
assertEquals(2, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.confighost).size());
- assertTrue(tester.nodeRepository.nodes().node("cfghost3").isEmpty());
+ assertSame(State.deprovisioned, tester.nodeRepository.nodes().node("cfghost3").get().state());
}
private static class DynamicProvisioningTester {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
index 4c8c5d80018..f39a086e97a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
@@ -4,6 +4,9 @@
"name": "AutoscalingMaintainer"
},
{
+ "name": "DeprovisionedExpirer"
+ },
+ {
"name": "DirtyExpirer"
},
{