diff options
author | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-11-01 15:36:37 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-11-02 15:58:06 +0100 |
commit | 6688797036b4239ba58c8774f4c0893ed660bbc9 (patch) | |
tree | 9140fceebfa305b3997e9ddd1bfb232411afb17e /node-repository/src | |
parent | 56e467f72da59b3efbfaff574c22e79542f9d17e (diff) |
Move disk replacer to separate maintainer
Diffstat (limited to 'node-repository/src')
6 files changed, 107 insertions, 54 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java new file mode 100644 index 00000000000..acd5cb61d81 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeMutex; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; + +import java.time.Duration; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Rebuilds hosts by replacing the root disk (only supports hosts with remote storage). + * + * @author mpolden + */ +public class DiskReplacer extends NodeRepositoryMaintainer { + + private static final Logger log = Logger.getLogger(DiskReplacer.class.getName()); + + private final HostProvisioner hostProvisioner; + + DiskReplacer(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) { + super(nodeRepository, interval, metric); + this.hostProvisioner = hostProvisioner; + } + + @Override + protected double maintain() { + NodeList nodes = nodeRepository().nodes().list().rebuilding(true); + int failures = 0; + for (var host : nodes) { + Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10)); + if (optionalMutex.isEmpty()) continue; + try (NodeMutex mutex = optionalMutex.get()) { + // Re-check flag while holding lock + host = mutex.node(); + if (!host.status().wantToRebuild()) { + continue; + } + Node updatedNode = hostProvisioner.replaceRootDisk(host); + if (!updatedNode.status().wantToRebuild()) { + nodeRepository().nodes().write(updatedNode, mutex); + } + } catch (RuntimeException e) { + failures++; + log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + interval(), e); + } + } + return this.asSuccessFactor(nodes.size(), failures); + } +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index d6cfeab0cd7..6470e4fdb23 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -79,7 +79,6 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { NodeList nodes = nodeRepository().nodes().list(); resumeProvisioning(nodes); convergeToCapacity(nodes); - replaceRootDisk(nodes); return 1.0; } @@ -152,28 +151,6 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { }); } - /** Replace the root disk of hosts that have requested soft-rebuild */ - private void replaceRootDisk(NodeList nodes) { - NodeList softRebuildingHosts = nodes.rebuilding(true); - for (var host : softRebuildingHosts) { - Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10)); - if (optionalMutex.isEmpty()) return; - try (NodeMutex mutex = optionalMutex.get()) { - // Re-check flag while holding lock - host = mutex.node(); - if (!host.status().wantToRebuild()) { - continue; - } - Node updatedNode = hostProvisioner.replaceRootDisk(host); - if (!updatedNode.status().wantToRebuild()) { - nodeRepository().nodes().write(updatedNode, mutex); - } - } catch (RuntimeException e) { - log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + interval(), e); - } - } - } - /** * Provision hosts to ensure there is room to allocate spare nodes. * diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 7c748b60527..6175531fc65 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -69,11 +69,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent { .map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)) .ifPresent(maintainers::add); provisionServiceProvider.getHostProvisioner() - .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)) - .ifPresent(maintainers::add); - provisionServiceProvider.getHostProvisioner() - .map(hostProvisioner -> new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner)) - .ifPresent(maintainers::add); + .map(hostProvisioner -> List.of( + new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric), + new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner), + new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner))) + .ifPresent(maintainers::addAll); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); } @@ -112,6 +112,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration infrastructureProvisionInterval; private final Duration loadBalancerExpirerInterval; private final Duration dynamicProvisionerInterval; + private final Duration diskReplacerInterval; private final Duration osUpgradeActivatorInterval; private final Duration rebalancerInterval; private final Duration nodeMetricsCollectionInterval; @@ -125,6 +126,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { DefaultTimes(Zone zone, Deployer deployer) { autoscalingInterval = Duration.ofMinutes(5); dynamicProvisionerInterval = Duration.ofMinutes(3); + diskReplacerInterval = Duration.ofMinutes(3); failedExpirerInterval = Duration.ofMinutes(10); failGrace = Duration.ofMinutes(20); infrastructureProvisionInterval = Duration.ofMinutes(3); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index 81740accd78..3aa841ecacf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -143,8 +143,8 @@ public class MockHostProvisioner implements HostProvisioner { return this; } - public MockHostProvisioner completeRebuildOf(Node host) { - rebuildsCompleted.add(host.hostname()); + public MockHostProvisioner completeRebuildOf(String hostname) { + rebuildsCompleted.add(hostname); return this; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacerTest.java new file mode 100644 index 00000000000..ef0c524c48e --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacerTest.java @@ -0,0 +1,42 @@ +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author mpolden + */ +public class DiskReplacerTest { + + private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); + private final MockHostProvisioner hostProvisioner = new MockHostProvisioner(List.of()); + private final DiskReplacer diskReplacer = new DiskReplacer(tester.nodeRepository(), Duration.ofDays(1), new TestMetric(), hostProvisioner); + + @Test + public void rebuild_host() { + tester.makeReadyHosts(2, new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)).activateTenantHosts(); + + // No rebuilds in initial run + diskReplacer.maintain(); + assertEquals(0, tester.nodeRepository().nodes().list().rebuilding(true).size()); + + // Host starts rebuilding + tester.nodeRepository().nodes().rebuild("host-1.yahoo.com", true, Agent.RebuildingOsUpgrader, + tester.nodeRepository().clock().instant()); + diskReplacer.maintain(); + assertEquals(1, tester.nodeRepository().nodes().list().rebuilding(true).size()); + + // Rebuild completes + hostProvisioner.completeRebuildOf("host-1.yahoo.com"); + diskReplacer.maintain(); + assertEquals(0, tester.nodeRepository().nodes().list().rebuilding(true).size()); + } +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index d28d0321e6c..f9c7d7cd88d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -612,30 +612,6 @@ public class DynamicProvisioningMaintainerTest { } } - @Test - public void rebuild_host() { - var tester = new DynamicProvisioningTester(); - Node host1 = tester.addNode("host1", Optional.empty(), NodeType.host, Node.State.active); - Node host11 = tester.addNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.parked, DynamicProvisioningTester.tenantApp); - Node host2 = tester.addNode("host2", Optional.empty(), NodeType.host, Node.State.active); - Node host21 = tester.addNode("host2-1", Optional.of("host2"), NodeType.tenant, Node.State.parked, DynamicProvisioningTester.tenantApp); - - // No rebuilds in initial run - tester.maintainer.maintain(); - assertEquals(0, tester.nodeRepository.nodes().list().rebuilding(true).size()); - - // Host starts rebuilding - tester.nodeRepository.nodes().rebuild(host1.hostname(), true, Agent.RebuildingOsUpgrader, - tester.nodeRepository.clock().instant()); - tester.maintainer.maintain(); - assertEquals(1, tester.nodeRepository.nodes().list().rebuilding(true).size()); - - // Rebuild completes - tester.hostProvisioner.completeRebuildOf(host1); - tester.maintainer.maintain(); - assertEquals(0, tester.nodeRepository.nodes().list().rebuilding(true).size()); - } - private void provisionHostsIn(CloudAccount cloudAccount, int count, DynamicProvisioningTester tester) { tester.maintainer.maintain(); List<String> provisionedHostnames = tester.hostProvisioner.provisionedHosts().stream() |