aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorArnstein Ressem <aressem@gmail.com>2021-06-04 21:57:32 +0200
committerGitHub <noreply@github.com>2021-06-04 21:57:32 +0200
commitcd1b747b4f65fa3a6ed6aace23235db7591638c5 (patch)
tree810ae8989030055b3cea65c684f69c0cfbd404b5 /node-repository/src
parentb14315a6f51bc8e5bce22e0d9d11d0e730aaf96d (diff)
Revert "Emit a success factor from maintainers"
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java10
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java2
21 files changed, 77 insertions, 114 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index 24160c19dfa..9ac1ca2b4c1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -41,9 +41,9 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected final double maintain() {
+ protected final boolean maintain() {
applicationsNeedingMaintenance().forEach(this::deploy);
- return 1.0;
+ return true;
}
/** Returns the number of deployments that are pending execution */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 3914a0c9f07..05eb878e1b1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -44,13 +44,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return 1.0;
+ boolean success = true;
+ if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return success;
activeNodesByApplication().forEach(this::autoscale);
- return 1.0;
+ return success;
}
private void autoscale(ApplicationId application, NodeList applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 0eb2038e233..af32c285d99 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -74,13 +74,13 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
NodeList nodes = nodeRepository().nodes().list();
resumeProvisioning(nodes, lock);
convergeToCapacity(nodes);
}
- return 1.0;
+ return true;
}
/** Resume provisioning of already provisioned hosts and their children */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
index 25108425e6e..2443a12d198 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
@@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired);
if ( ! expired.isEmpty()) {
@@ -49,7 +49,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
metric.add("expired." + fromState, expired.size(), null);
- return 1.0;
+ return true;
}
protected boolean isExpired(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index 7505ce42668..e98da35aa6a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -66,7 +66,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
List<Node> remainingNodes = new ArrayList<>(nodeRepository.nodes().list(Node.State.failed)
.nodeType(NodeType.tenant, NodeType.host)
.asList());
@@ -78,7 +78,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
recycleIf(remainingNodes, node ->
node.allocation().get().membership().cluster().isStateful() &&
node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statefulExpiry)));
- return 1.0;
+ return true;
}
/** Recycle the nodes matching condition, and remove those nodes from the nodes list. */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
index 80f74a011c0..caf20463f60 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
@@ -43,7 +43,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Instant now = nodeRepository().clock().instant();
NodeList allNodes = nodeRepository().nodes().list();
for (var nodeType : NodeType.values()) {
@@ -51,7 +51,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer {
if (upgradingVespa(allNodes, nodeType)) continue;
unencryptedHosts(allNodes, nodeType).forEach(host -> encrypt(host, now));
}
- return 1.0;
+ return true;
}
/** Returns whether any node of given type is currently upgrading its Vespa version */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
index d9f5ea6a7a9..e317333135c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
@@ -39,9 +39,9 @@ public class InfrastructureProvisioner extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
infraDeployer.activateAllSupportedInfraApplications(false);
- return 1.0;
+ return true;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
index ac9d8d6671a..10069fd1a18 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.common.collect.Sets;
import com.yahoo.jdisc.Metric;
-import com.yahoo.lang.MutableInteger;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
@@ -55,9 +54,9 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
expireReserved();
- return ( removeInactive() + pruneReals() ) / 2;
+ return removeInactive() & pruneReals();
}
/** Move reserved load balancer that have expired to inactive */
@@ -69,8 +68,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
}
/** Deprovision inactive load balancers that have expired */
- private double removeInactive() {
- MutableInteger attempts = new MutableInteger(0);
+ private boolean removeInactive() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
var expiry = nodeRepository().clock().instant().minus(inactiveExpiry);
@@ -78,7 +76,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
lb.changedAt().isBefore(expiry) &&
allocatedNodes(lb.id()).isEmpty(), lb -> {
try {
- attempts.add(1);
log.log(Level.INFO, () -> "Removing expired inactive load balancer " + lb.id());
service.remove(lb.id().application(), lb.id().cluster());
db.removeLoadBalancer(lb.id());
@@ -95,12 +92,11 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
.collect(Collectors.joining(", ")),
interval()));
}
- return asSuccessFactor(attempts.get(), failed.size());
+ return lastException.get() == null;
}
/** Remove reals from inactive load balancers */
- private double pruneReals() {
- var attempts = new MutableInteger(0);
+ private boolean pruneReals() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
patchLoadBalancers(lb -> lb.state() == State.inactive, lb -> {
@@ -111,7 +107,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value()));
if (reals.equals(lb.instance().get().reals())) return; // Nothing to remove
try {
- attempts.add(1);
LOG.log(Level.INFO, () -> "Removing reals from inactive load balancer " + lb.id() + ": " + Sets.difference(lb.instance().get().reals(), reals));
service.create(new LoadBalancerSpec(lb.id().application(), lb.id().cluster(), reals), true);
db.writeLoadBalancer(lb.with(lb.instance().map(instance -> instance.withReals(reals))));
@@ -129,7 +124,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
interval()),
lastException.get());
}
- return asSuccessFactor(attempts.get(), failed.size());
+ return lastException.get() == null;
}
/** Patch load balancers matching given filter, while holding lock */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 3990c5099eb..85437b3e78a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -65,7 +65,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
}
@Override
- public double maintain() {
+ public boolean maintain() {
NodeList nodes = nodeRepository().nodes().list();
ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot();
@@ -80,7 +80,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateRepairTicketMetrics(nodes);
updateAllocationMetrics(nodes);
updateExclusiveSwitchMetrics(nodes);
- return 1.0;
+ return true;
}
private void updateAllocationMetrics(NodeList nodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index f16459ee8b9..effa41dc69f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -72,21 +72,17 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- int attempts = 0;
- int failures = 0;
int throttledHostFailures = 0;
int throttledNodeFailures = 0;
// Ready nodes
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) {
- attempts++;
Node node = entry.getKey();
if (throttle(node)) {
- failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -100,12 +96,10 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Active nodes
for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) {
- attempts++;
Node node = entry.getKey();
if (!failAllowedFor(node.type())) continue;
if (throttle(node)) {
- failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -122,15 +116,11 @@ public class NodeFailer extends NodeRepositoryMaintainer {
if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
Optional<NodeMutex> locked = Optional.empty();
try {
- attempts++;
locked = nodeRepository().nodes().lockAndGet(host);
if (locked.isEmpty()) continue;
nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
"Host should be failed and have no tenant nodes");
}
- catch (Exception e) {
- failures++;
- }
finally {
locked.ifPresent(NodeMutex::close);
}
@@ -140,7 +130,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null);
- return asSuccessFactor(attempts, failures);
+ return throttlingActive == 0;
}
private Map<Node, String> getReadyNodesByFailureReason() {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
index 37969a30b81..fe2fb5229f9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
@@ -5,7 +5,6 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationLockException;
import com.yahoo.config.provision.HostLivenessTracker;
import com.yahoo.jdisc.Metric;
-import com.yahoo.lang.MutableInteger;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
@@ -49,11 +48,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- return ( updateReadyNodeLivenessEvents() + updateActiveNodeDownState() ) / 2;
+ protected boolean maintain() {
+ updateReadyNodeLivenessEvents();
+ updateActiveNodeDownState();
+ return true;
}
- private double updateReadyNodeLivenessEvents() {
+ private void updateReadyNodeLivenessEvents() {
// Update node last request events through ZooKeeper to collect request to all config servers.
// We do this here ("lazily") to avoid writing to zk for each config request.
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
@@ -68,16 +69,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
}
}
}
- return 1.0;
}
/**
* If the node is down (see {@link #allDown}), and there is no "down" history record, we add it.
* Otherwise we remove any "down" history record.
*/
- private double updateActiveNodeDownState() {
- var attempts = new MutableInteger(0);
- var failures = new MutableInteger(0);
+ private void updateActiveNodeDownState() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName().forEach((hostname, serviceInstances) -> {
Optional<Node> node = activeNodes.node(hostname.toString());
@@ -92,7 +90,6 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
try (var lock = nodeRepository().nodes().lock(owner)) {
node = getNode(hostname.toString(), owner, lock); // Re-get inside lock
if (node.isEmpty()) return; // Node disappeared or changed allocation
- attempts.add(1);
if (isDown) {
recordAsDown(node.get(), lock);
} else {
@@ -101,10 +98,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
} catch (ApplicationLockException e) {
// Fine, carry on with other nodes. We'll try updating this one in the next run
log.log(Level.WARNING, "Could not lock " + owner + ": " + Exceptions.toMessageString(e));
- failures.add(1);
}
});
- return asSuccessFactor(attempts.get(), failures.get());
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
index d671900d08c..1ea4577f7fe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -33,21 +33,19 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- var failures = new MutableInteger(0);
+ protected boolean maintain() {
try {
+ var warnings = new MutableInteger(0);
Set<ApplicationId> applications = activeNodesByApplication().keySet();
- if (applications.isEmpty()) return 1.0;
+ if (applications.isEmpty()) return true;
long pauseMs = interval().toMillis() / applications.size() - 1; // spread requests over interval
int done = 0;
for (ApplicationId application : applications) {
- attempts++;
metricsFetcher.fetchMetrics(application)
.whenComplete((metricsResponse, exception) -> handleResponse(metricsResponse,
exception,
- failures,
+ warnings,
application));
if (++done < applications.size())
Thread.sleep(pauseMs);
@@ -58,22 +56,23 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
nodeRepository().metricsDb().gc();
- return asSuccessFactor(attempts, failures.get());
+ // Suppress failures for manual zones for now to avoid noise
+ return nodeRepository().zone().environment().isManuallyDeployed() || warnings.get() == 0;
}
catch (InterruptedException e) {
- return asSuccessFactor(attempts, failures.get());
+ return false;
}
}
private void handleResponse(MetricsResponse response,
Throwable exception,
- MutableInteger failures,
+ MutableInteger warnings,
ApplicationId application) {
if (exception != null) {
- if (failures.get() < maxWarningsPerInvocation)
+ if (warnings.get() < maxWarningsPerInvocation)
log.log(Level.WARNING, "Could not update metrics for " + application + ": " +
Exceptions.toMessageString(exception));
- failures.add(1);
+ warnings.add(1);
}
else if (response != null) {
nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
index c282fcdb7fc..6ee657beadd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
@@ -37,7 +37,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
// Reboot candidates: Nodes in long-term states, where we know we can safely orchestrate a reboot
List<Node> nodesToReboot = nodeRepository().nodes().list(Node.State.active, Node.State.ready).stream()
.filter(node -> node.type().isHost())
@@ -46,7 +46,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
if (!nodesToReboot.isEmpty())
nodeRepository().nodes().reboot(NodeListFilter.from(nodesToReboot));
- return 1.0;
+ return true;
}
private boolean shouldReboot(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index fe5cd419b31..0a1f6961f9f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -25,7 +25,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval, Metric metric) {
super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(),
- new NodeRepositoryJobMetrics(metric), nodeRepository.database().cluster(), true);
+ jobMetrics(metric), nodeRepository.database().cluster(), true);
this.nodeRepository = nodeRepository;
}
@@ -48,20 +48,10 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
.groupingBy(node -> node.allocation().get().owner());
}
- private static class NodeRepositoryJobMetrics extends JobMetrics {
-
- private final Metric metric;
-
- public NodeRepositoryJobMetrics(Metric metric) {
- this.metric = metric;
- }
-
- @Override
- protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
+ private static JobMetrics jobMetrics(Metric metric) {
+ return new JobMetrics((job, consecutiveFailures) -> {
metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
- metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
- }
-
+ });
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
index 749603a373d..4eba15307cb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
@@ -23,13 +23,13 @@ public class OsUpgradeActivator extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (var nodeType : NodeType.values()) {
if (!nodeType.isHost()) continue;
boolean resume = canUpgradeOsOf(nodeType);
nodeRepository().osVersions().resumeUpgradeOf(nodeType, resume);
}
- return 1.0;
+ return true;
}
/** Returns whether to allow OS upgrade of nodes of given type */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 7bb748c92c9..1543506a78e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -33,18 +33,19 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0; // Rebalancing not necessary
- if (nodeRepository().zone().environment().isTest()) return 1.0; // Short lived deployments; no need to rebalance
+ boolean success = true;
+ if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success; // Rebalancing not necessary
+ if (nodeRepository().zone().environment().isTest()) return success; // Short lived deployments; no need to rebalance
// Work with an unlocked snapshot as this can take a long time and full consistency is not needed
NodeList allNodes = nodeRepository().nodes().list();
updateSkewMetric(allNodes);
- if ( ! zoneIsStable(allNodes)) return 1.0;
+ if ( ! zoneIsStable(allNodes)) return success;
findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
- return 1.0;
+ return success;
}
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index 3f5893b368a..f72daf1bc2b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -48,10 +48,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- int successes = 0;
-
+ protected boolean maintain() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
Map<ApplicationId, NodeList> retiredNodesByApplication = activeNodes.retired().groupingBy(node -> node.allocation().get().owner());
for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) {
@@ -60,19 +57,17 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
List<Node> nodesToRemove = retiredNodes.stream().filter(n -> canRemove(n, activeNodes)).collect(Collectors.toList());
if (nodesToRemove.isEmpty()) continue;
- attempts++;
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
if ( ! deployment.isValid()) continue;
nodeRepository().nodes().setRemovable(application, nodesToRemove);
boolean success = deployment.activate().isPresent();
- if ( ! success) continue;
+ if ( ! success) return success;
String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
- successes++;
}
}
- return attempts == 0 ? 1.0 : ((double)successes / attempts);
+ return true;
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index 888f06a5004..c217580872b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -36,16 +36,13 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().zone().environment().isProduction()) return 1.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().zone().environment().isProduction()) return true;
- int attempts = 0;
int successes = 0;
- for (var application : activeNodesByApplication().entrySet()) {
- attempts++;
+ for (var application : activeNodesByApplication().entrySet())
successes += suggest(application.getKey(), application.getValue());
- }
- return attempts == 0 ? 1.0 : ((double)successes / attempts);
+ return successes > 0;
}
private int suggest(ApplicationId application, NodeList applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 0589571e9d8..0307ae13b24 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -66,11 +66,12 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
+ boolean success = true;
// Don't need to maintain spare capacity in dynamically provisioned zones; can provision more on demand.
- if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0;
+ if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success;
NodeList allNodes = nodeRepository().nodes().list();
CapacityChecker capacityChecker = new CapacityChecker(allNodes);
@@ -79,7 +80,6 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
metric.set("overcommittedHosts", overcommittedHosts.size(), null);
retireOvercommitedHosts(allNodes, overcommittedHosts);
- boolean success = true;
Optional<CapacityChecker.HostFailurePath> failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
if (failurePath.isPresent()) {
int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
@@ -96,7 +96,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
metric.set("spareHostCapacity", spareHostCapacity, null);
}
- return success ? 1.0 : 0.0;
+ return success;
}
private boolean execute(List<Move> mitigation, CapacityChecker.HostFailurePath failurePath) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
index 44890f2f5af..cfab980570d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
@@ -33,14 +33,14 @@ public class SwitchRebalancer extends NodeMover<Move> {
}
@Override
- protected double maintain() {
- if (!nodeRepository().nodes().isWorking()) return 0.0;
- if (!nodeRepository().zone().environment().isProduction()) return 1.0;
+ protected boolean maintain() {
+ if (!nodeRepository().nodes().isWorking()) return false;
+ if (!nodeRepository().zone().environment().isProduction()) return true;
NodeList allNodes = nodeRepository().nodes().list(); // Lockless as strong consistency is not needed
- if (!zoneIsStable(allNodes)) return 1.0;
+ if (!zoneIsStable(allNodes)) return true;
findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository());
- return 1.0;
+ return true;
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
index cfe6e4d348d..5b2f7ce91e8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
@@ -43,7 +43,7 @@ public class NodeMetricsDbMaintainerTest {
fetcher,
Duration.ofHours(1),
new TestMetric());
- assertEquals(maintainer.maintain(), 1.0, 0.0000001);
+ assertTrue(maintainer.maintain());
List<NodeTimeseries> timeseriesList = tester.nodeRepository().metricsDb().getNodeTimeseries(Duration.ofDays(1),
Set.of("host-1.yahoo.com", "host-2.yahoo.com"));
assertEquals(2, timeseriesList.size());