diff options
author | Martin Polden <mpolden@mpolden.no> | 2022-06-01 13:38:08 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2022-06-01 15:01:43 +0200 |
commit | 8bf4fe891fa15414d441a29b02db98141847259e (patch) | |
tree | 3765ff04832218d07d3c20aed135bf909dc266a5 /node-repository/src | |
parent | 6e65f1d8e061964a0db4a967e5069442fe0e1b74 (diff) |
Implement HostRetirer
Diffstat (limited to 'node-repository/src')
5 files changed, 148 insertions, 1 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirer.java new file mode 100644 index 00000000000..aa3a82d52c9 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirer.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.CloudAccount; +import com.yahoo.config.provision.HostEvent; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; + +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * Retire and deprovision hosts that are scheduled for maintenance by the cloud provider. + * + * Only applies to dynamically provisioned zones, where a replacement host will be provisioned. + * + * @author mpolden + */ +public class HostRetirer extends NodeRepositoryMaintainer { + + private static final Logger LOG = Logger.getLogger(HostRetirer.class.getName()); + + private final HostProvisioner hostProvisioner; + + public HostRetirer(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) { + super(nodeRepository, interval, metric); + this.hostProvisioner = Objects.requireNonNull(hostProvisioner); + } + + @Override + protected double maintain() { + if (!nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0; + + NodeList candidates = nodeRepository().nodes().list() + .parents() + .not().deprovisioning(); + List<CloudAccount> cloudAccounts = candidates.stream().flatMap(c -> c.cloudAccount().stream()) + .distinct() + .collect(Collectors.toList()); + Map<String, List<HostEvent>> eventsByHostId = hostProvisioner.hostEventsIn(cloudAccounts).stream() + .collect(Collectors.groupingBy(HostEvent::hostId)); + Instant now = nodeRepository().clock().instant(); + for (var host : candidates) { + List<HostEvent> events = eventsByHostId.get(host.id()); + if (events == null || events.isEmpty()) continue; + + LOG.info("Deprovisioning " + host + " affected by maintenance event" + (events.size() > 1 ? "s" : "") + ": " + events); + nodeRepository().nodes().deprovision(host.hostname(), Agent.system, now); + } + return 1.0; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 29642bc25dd..5e703139f41 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -72,6 +72,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent { provisionServiceProvider.getHostProvisioner() .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)) .ifPresent(maintainers::add); + provisionServiceProvider.getHostProvisioner() + .map(hostProvisioner -> new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner)) + .ifPresent(maintainers::add); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); } @@ -116,6 +119,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration autoscalingInterval; private final Duration scalingSuggestionsInterval; private final Duration switchRebalancerInterval; + private final Duration hostRetirerInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -145,6 +149,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { throttlePolicy = NodeFailer.ThrottlePolicy.hosted; inactiveConfigServerExpiry = Duration.ofMinutes(5); inactiveControllerExpiry = Duration.ofMinutes(5); + hostRetirerInterval = Duration.ofMinutes(30); if (zone.environment().isProduction() && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index b849fccfaa5..567fa9098c9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -5,6 +5,7 @@ import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.HostEvent; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; @@ -14,7 +15,7 @@ import java.util.Optional; import java.util.Set; /** - * Service for provisioning physical docker tenant hosts inside the zone. + * A service which supports provisioning container hosts dynamically. * * @author freva */ @@ -78,4 +79,10 @@ public interface HostProvisioner { */ void deprovision(Node host); + /** + * Returns the maintenance events scheduled for hosts in this zone, in given cloud accounts. Host events in the + * zone's default cloud account are always included. + */ + List<HostEvent> hostEventsIn(List<CloudAccount> cloudAccounts); + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index c09376ff103..8d60dd30dd1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.HostEvent; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.NodeAllocationException; @@ -32,6 +33,7 @@ import java.util.stream.IntStream; public class MockHostProvisioner implements HostProvisioner { private final List<ProvisionedHost> provisionedHosts = new ArrayList<>(); + private final List<HostEvent> hostEvents = new ArrayList<>(); private final List<Flavor> flavors; private final MockNameResolver nameResolver; private final int memoryTaxGb; @@ -100,6 +102,11 @@ public class MockHostProvisioner implements HostProvisioner { deprovisionedHosts++; } + @Override + public List<HostEvent> hostEventsIn(List<CloudAccount> cloudAccounts) { + return Collections.unmodifiableList(hostEvents); + } + /** Returns the hosts that have been provisioned by this */ public List<ProvisionedHost> provisionedHosts() { return Collections.unmodifiableList(provisionedHosts); @@ -130,6 +137,11 @@ public class MockHostProvisioner implements HostProvisioner { return this; } + public MockHostProvisioner addEvent(HostEvent event) { + hostEvents.add(event); + return this; + } + public boolean compatible(Flavor flavor, NodeResources resources) { NodeResources resourcesToVerify = resources.withMemoryGb(resources.memoryGb() - memoryTaxGb); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java new file mode 100644 index 00000000000..a46946b7cee --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.Cloud; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.HostEvent; +import com.yahoo.config.provision.NodeFlavors; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; +import com.yahoo.jdisc.test.MockMetric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author mpolden + */ +public class HostRetirerTest { + + @Test + public void retire_hosts() { + NodeFlavors flavors = FlavorConfigBuilder.createDummies("default"); + MockHostProvisioner hostProvisioner = new MockHostProvisioner(flavors.getFlavors()); + ProvisioningTester tester = new ProvisioningTester.Builder().hostProvisioner(hostProvisioner) + .flavors(flavors.getFlavors()) + .zone(new Zone(Cloud.builder() + .dynamicProvisioning(true) + .build(), SystemName.defaultSystem(), + Environment.defaultEnvironment(), + RegionName.defaultName())) + .build(); + HostRetirer retirer = new HostRetirer(tester.nodeRepository(), Duration.ofDays(1), new MockMetric(), hostProvisioner); + tester.makeReadyHosts(3, new NodeResources(24, 48, 1000, 10)) + .activateTenantHosts(); + List<String> hostIds = tester.nodeRepository().nodes().list(Node.State.active).mapToList(Node::id); + + // No events scheduled + retirer.maintain(); + NodeList hosts = tester.nodeRepository().nodes().list(); + assertEquals(0, hosts.deprovisioning().size()); + + // Event is scheduled for one known host + hostProvisioner.addEvent(new HostEvent("event0", hostIds.get(1), getClass().getSimpleName())) + .addEvent(new HostEvent("event1", "unknown-host-id", getClass().getSimpleName())); + + // Next run retires host + retirer.maintain(); + hosts = tester.nodeRepository().nodes().list(); + assertEquals(1, hosts.deprovisioning().size()); + } + +} |