summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorMorten Tokle <morten.tokle@gmail.com>2018-09-25 14:46:51 +0200
committerGitHub <noreply@github.com>2018-09-25 14:46:51 +0200
commit9b4b78eb4acdaa1cc9f1f3a8ae4996efb3af8824 (patch)
treec207b999387d1dddf31cee7dff814a3bcc2005c8 /controller-server
parent7319c973253ca07ceb530e36688d8b034022cc8a (diff)
parenta39a1984c0ed58bf2cdd7528edef187de5e13be5 (diff)
Merge pull request #7086 from vespa-engine/mpolden/more-details-in-warning
Log number of applications failing metrics update
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java23
1 files changed, 15 insertions, 8 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index 1507cc8fedd..0b56916a0de 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -12,7 +12,6 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics;
import com.yahoo.vespa.hosted.controller.application.RotationStatus;
import com.yahoo.yolean.Exceptions;
-import java.io.UncheckedIOException;
import java.time.Duration;
import java.util.Collections;
import java.util.List;
@@ -20,7 +19,8 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -29,6 +29,7 @@ import java.util.logging.Logger;
* update applications with this info.
*
* @author smorgrav
+ * @author mpolden
*/
public class DeploymentMetricsMaintainer extends Maintainer {
@@ -45,7 +46,8 @@ public class DeploymentMetricsMaintainer extends Maintainer {
@Override
protected void maintain() {
- AtomicBoolean hasWarned = new AtomicBoolean(false);
+ AtomicInteger failures = new AtomicInteger(0);
+ AtomicReference<Exception> lastException = new AtomicReference<>(null);
List<Application> applicationList = ApplicationList.from(applications.asList()).notPullRequest().asList();
// Run parallel stream inside a custom ForkJoinPool so that we can control the number of threads used
@@ -72,17 +74,22 @@ public class DeploymentMetricsMaintainer extends Maintainer {
applications.store(locked.with(deployment.zone(), newMetrics)
.recordActivityAt(controller().clock().instant(), deployment.zone())));
}
- } catch (UncheckedIOException e) {
- if (!hasWarned.getAndSet(true)) {// produce only one warning per maintenance interval
- log.log(Level.WARNING, "Failed to query metrics service: " + Exceptions.toMessageString(e) +
- ". Retrying in " + maintenanceInterval());
- }
+ } catch (Exception e) {
+ failures.incrementAndGet();
+ lastException.set(e);
}
});
});
pool.shutdown();
try {
pool.awaitTermination(30, TimeUnit.MINUTES);
+ if (lastException.get() != null) {
+ log.log(Level.WARNING, String.format("Failed to query metrics service for %d/%d applications. Last error: %s. Retrying in %s",
+ failures.get(),
+ applicationList.size(),
+ Exceptions.toMessageString(lastException.get()),
+ maintenanceInterval()));
+ }
} catch (InterruptedException e) {
throw new RuntimeException(e);
}