aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-06-06 11:05:00 +0200
committerJon Bratseth <bratseth@gmail.com>2021-06-06 11:05:00 +0200
commit251f60541439d0661c2aec5344c3dcc5b31686a0 (patch)
tree20926701f5c05986ff397428a515211cee25d089 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
parentec755c18cfe7ef1c2ffbb1f9b78a857746bf9484 (diff)
Revert "Revert "Emit a success factor from maintainers""
This reverts commit cd1b747b4f65fa3a6ed6aace23235db7591638c5.
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java21
1 files changed, 11 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
index 1ea4577f7fe..d671900d08c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -33,19 +33,21 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
+ int attempts = 0;
+ var failures = new MutableInteger(0);
try {
- var warnings = new MutableInteger(0);
Set<ApplicationId> applications = activeNodesByApplication().keySet();
- if (applications.isEmpty()) return true;
+ if (applications.isEmpty()) return 1.0;
long pauseMs = interval().toMillis() / applications.size() - 1; // spread requests over interval
int done = 0;
for (ApplicationId application : applications) {
+ attempts++;
metricsFetcher.fetchMetrics(application)
.whenComplete((metricsResponse, exception) -> handleResponse(metricsResponse,
exception,
- warnings,
+ failures,
application));
if (++done < applications.size())
Thread.sleep(pauseMs);
@@ -56,23 +58,22 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
nodeRepository().metricsDb().gc();
- // Suppress failures for manual zones for now to avoid noise
- return nodeRepository().zone().environment().isManuallyDeployed() || warnings.get() == 0;
+ return asSuccessFactor(attempts, failures.get());
}
catch (InterruptedException e) {
- return false;
+ return asSuccessFactor(attempts, failures.get());
}
}
private void handleResponse(MetricsResponse response,
Throwable exception,
- MutableInteger warnings,
+ MutableInteger failures,
ApplicationId application) {
if (exception != null) {
- if (warnings.get() < maxWarningsPerInvocation)
+ if (failures.get() < maxWarningsPerInvocation)
log.log(Level.WARNING, "Could not update metrics for " + application + ": " +
Exceptions.toMessageString(exception));
- warnings.add(1);
+ failures.add(1);
}
else if (response != null) {
nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics());