summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@verizonmedia.com>2019-09-18 15:46:24 +0200
committerGitHub <noreply@github.com>2019-09-18 15:46:24 +0200
commit48272e7af2589989361a4bdc13f57fddc7f3a09e (patch)
tree9b42951db85ad16155da048f254ea3591fb2084f
parent9f2977281ae8d719f20f3d69951806ca92b34fab (diff)
parentd7e9109f6ab6a147efcbfc99093c48cdad8d0e0a (diff)
Merge pull request #10701 from vespa-engine/olaa/more-fault-tolerant-metric-retrieval
Set request timeout
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java20
2 files changed, 19 insertions, 12 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
index c23af021e3b..ffa820bd433 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
@@ -6,6 +6,8 @@ import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.Slime;
import com.yahoo.vespa.config.SlimeUtils;
+import com.yahoo.yolean.Exceptions;
+import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
@@ -39,7 +41,12 @@ public class ClusterMetricsRetriever {
private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR);
- private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create().build();
+ private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create()
+ .setDefaultRequestConfig(RequestConfig.custom()
+ .setConnectTimeout(10 * 1000)
+ .setSocketTimeout(10 * 1000)
+ .build())
+ .build();
/**
* Call the metrics API on each host and aggregate the metrics
@@ -88,7 +95,7 @@ public class ClusterMetricsRetriever {
return slime;
} catch (IOException e) {
// Usually caused by applications being deleted during metric retrieval
- log.warning("Was unable to fetch metrics from " + hostURI);
+ log.warning("Was unable to fetch metrics from " + hostURI + " : " + Exceptions.toMessageString(e));
return new Slime();
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index feee2edf896..7c060c599ef 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -46,13 +46,13 @@ public class DeploymentMetricsMaintainer extends Maintainer {
// Run parallel stream inside a custom ForkJoinPool so that we can control the number of threads used
ForkJoinPool pool = new ForkJoinPool(applicationsToUpdateInParallel);
- pool.submit(() -> {
+ pool.submit(() ->
applicationList.parallelStream().forEach(application -> {
- try {
- applications.lockIfPresent(application.id(), locked ->
- applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id()))));
+ applications.lockIfPresent(application.id(), locked ->
+ applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id()))));
- for (Deployment deployment : application.deployments().values()) {
+ for (Deployment deployment : application.deployments().values()) {
+ try {
if (deployment.version().getMajor() < 7) continue;
var collectedMetrics = controller().metrics().getDeploymentMetrics(application.id(), deployment.zone());
var now = controller().clock().instant();
@@ -70,13 +70,13 @@ public class DeploymentMetricsMaintainer extends Maintainer {
.recordActivityAt(now, existingDeployment.zone()));
});
+ } catch (Exception e) {
+ failures.incrementAndGet();
+ lastException.set(e);
}
- } catch (Exception e) {
- failures.incrementAndGet();
- lastException.set(e);
}
- });
- });
+ })
+ );
pool.shutdown();
try {
pool.awaitTermination(30, TimeUnit.MINUTES);