aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-11-01 15:36:37 +0100
committerValerij Fredriksen <valerijf@yahooinc.com>2022-11-02 15:58:06 +0100
commit6688797036b4239ba58c8774f4c0893ed660bbc9 (patch)
tree9140fceebfa305b3997e9ddd1bfb232411afb17e /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance
parent56e467f72da59b3efbfaff574c22e79542f9d17e (diff)
Move disk replacer to separate maintainer
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java56
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java23
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java12
3 files changed, 63 insertions, 28 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java
new file mode 100644
index 00000000000..acd5cb61d81
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DiskReplacer.java
@@ -0,0 +1,56 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeMutex;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Rebuilds hosts by replacing the root disk (only supports hosts with remote storage).
+ *
+ * @author mpolden
+ */
+public class DiskReplacer extends NodeRepositoryMaintainer {
+
+ private static final Logger log = Logger.getLogger(DiskReplacer.class.getName());
+
+ private final HostProvisioner hostProvisioner;
+
+ DiskReplacer(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) {
+ super(nodeRepository, interval, metric);
+ this.hostProvisioner = hostProvisioner;
+ }
+
+ @Override
+ protected double maintain() {
+ NodeList nodes = nodeRepository().nodes().list().rebuilding(true);
+ int failures = 0;
+ for (var host : nodes) {
+ Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10));
+ if (optionalMutex.isEmpty()) continue;
+ try (NodeMutex mutex = optionalMutex.get()) {
+ // Re-check flag while holding lock
+ host = mutex.node();
+ if (!host.status().wantToRebuild()) {
+ continue;
+ }
+ Node updatedNode = hostProvisioner.replaceRootDisk(host);
+ if (!updatedNode.status().wantToRebuild()) {
+ nodeRepository().nodes().write(updatedNode, mutex);
+ }
+ } catch (RuntimeException e) {
+ failures++;
+ log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + interval(), e);
+ }
+ }
+ return this.asSuccessFactor(nodes.size(), failures);
+ }
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index d6cfeab0cd7..6470e4fdb23 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -79,7 +79,6 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
NodeList nodes = nodeRepository().nodes().list();
resumeProvisioning(nodes);
convergeToCapacity(nodes);
- replaceRootDisk(nodes);
return 1.0;
}
@@ -152,28 +151,6 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
});
}
- /** Replace the root disk of hosts that have requested soft-rebuild */
- private void replaceRootDisk(NodeList nodes) {
- NodeList softRebuildingHosts = nodes.rebuilding(true);
- for (var host : softRebuildingHosts) {
- Optional<NodeMutex> optionalMutex = nodeRepository().nodes().lockAndGet(host, Duration.ofSeconds(10));
- if (optionalMutex.isEmpty()) return;
- try (NodeMutex mutex = optionalMutex.get()) {
- // Re-check flag while holding lock
- host = mutex.node();
- if (!host.status().wantToRebuild()) {
- continue;
- }
- Node updatedNode = hostProvisioner.replaceRootDisk(host);
- if (!updatedNode.status().wantToRebuild()) {
- nodeRepository().nodes().write(updatedNode, mutex);
- }
- } catch (RuntimeException e) {
- log.log(Level.WARNING, "Failed to rebuild " + host.hostname() + ", will retry in " + interval(), e);
- }
- }
- }
-
/**
* Provision hosts to ensure there is room to allocate spare nodes.
*
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 7c748b60527..6175531fc65 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -69,11 +69,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
.map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric))
.ifPresent(maintainers::add);
provisionServiceProvider.getHostProvisioner()
- .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric))
- .ifPresent(maintainers::add);
- provisionServiceProvider.getHostProvisioner()
- .map(hostProvisioner -> new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner))
- .ifPresent(maintainers::add);
+ .map(hostProvisioner -> List.of(
+ new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric),
+ new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner),
+ new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner)))
+ .ifPresent(maintainers::addAll);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
}
@@ -112,6 +112,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration infrastructureProvisionInterval;
private final Duration loadBalancerExpirerInterval;
private final Duration dynamicProvisionerInterval;
+ private final Duration diskReplacerInterval;
private final Duration osUpgradeActivatorInterval;
private final Duration rebalancerInterval;
private final Duration nodeMetricsCollectionInterval;
@@ -125,6 +126,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
DefaultTimes(Zone zone, Deployer deployer) {
autoscalingInterval = Duration.ofMinutes(5);
dynamicProvisionerInterval = Duration.ofMinutes(3);
+ diskReplacerInterval = Duration.ofMinutes(3);
failedExpirerInterval = Duration.ofMinutes(10);
failGrace = Duration.ofMinutes(20);
infrastructureProvisionInterval = Duration.ofMinutes(3);