summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java8
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java20
4 files changed, 34 insertions, 19 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
index c23af021e3b..ffa820bd433 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java
@@ -6,6 +6,8 @@ import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.Slime;
import com.yahoo.vespa.config.SlimeUtils;
+import com.yahoo.yolean.Exceptions;
+import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
@@ -39,7 +41,12 @@ public class ClusterMetricsRetriever {
private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR);
- private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create().build();
+ private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create()
+ .setDefaultRequestConfig(RequestConfig.custom()
+ .setConnectTimeout(10 * 1000)
+ .setSocketTimeout(10 * 1000)
+ .build())
+ .build();
/**
* Call the metrics API on each host and aggregate the metrics
@@ -88,7 +95,7 @@ public class ClusterMetricsRetriever {
return slime;
} catch (IOException e) {
// Usually caused by applications being deleted during metric retrieval
- log.warning("Was unable to fetch metrics from " + hostURI);
+ log.warning("Was unable to fetch metrics from " + hostURI + " : " + Exceptions.toMessageString(e));
return new Slime();
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index b40677a5dc9..69de59e55eb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -347,11 +347,13 @@ public class InternalStepRunner implements StepRunner {
if ( ! endpoints.containsKey(zoneId))
return false;
- for (URI endpoint : endpoints.get(zoneId).values())
- if ( ! controller.jobController().cloud().ready(endpoint)) {
- logger.log("Failed to get 100 consecutive OKs from " + endpoint);
+ for (URI endpoint : endpoints.get(zoneId).values()) {
+ URI workingEndpoint = controller.jobController().withWorkingSchemeAndPort(endpoint, id);
+ if ( ! controller.jobController().cloud().ready(workingEndpoint)) {
+ logger.log("Failed to get 100 consecutive OKs from " + workingEndpoint);
return false;
}
+ }
return true;
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 8ffcfb9c106..765cb465f50 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -447,15 +447,21 @@ public class JobController {
/** Returns a URI of the tester endpoint retrieved from the routing generator, provided it matches an expected form. */
Optional<URI> testerEndpoint(RunId id) {
DeploymentId testerId = new DeploymentId(id.tester().id(), id.type().zone(controller.system()));
- boolean useHttp = controller.system().isPublic()
- && ! directRoutingUseHttps.with(FetchVector.Dimension.APPLICATION_ID, id.tester().id().serializedForm()).value();
return controller.applications().getDeploymentEndpoints(testerId)
.stream().findAny()
.or(() -> controller.applications().routingPolicies().get(testerId).stream()
.findAny()
.map(policy -> policy.endpointIn(controller.system()).url()))
- // TODO jvenstad: Remove ugly thing when public deployments have a valid web certificate.
- .map(uri -> useHttp ? URI.create("http://" + uri.getHost() + ":443/") : uri);
+ .map(url -> withWorkingSchemeAndPort(url, id.tester().id()));
+ }
+
+ // TODO jvenstad: Remove ugly thing when public deployments have a valid web certificate.
+ URI withWorkingSchemeAndPort(URI url, ApplicationId id) {
+ if ( ! controller.system().isPublic()
+ || directRoutingUseHttps.with(FetchVector.Dimension.APPLICATION_ID, id.serializedForm()).value())
+ return url;
+
+ return URI.create("http://" + url.getHost() + ":443/");
}
/** Returns a set containing the zone of the deployment tested in the given run, and all production zones for the application. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index feee2edf896..7c060c599ef 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -46,13 +46,13 @@ public class DeploymentMetricsMaintainer extends Maintainer {
// Run parallel stream inside a custom ForkJoinPool so that we can control the number of threads used
ForkJoinPool pool = new ForkJoinPool(applicationsToUpdateInParallel);
- pool.submit(() -> {
+ pool.submit(() ->
applicationList.parallelStream().forEach(application -> {
- try {
- applications.lockIfPresent(application.id(), locked ->
- applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id()))));
+ applications.lockIfPresent(application.id(), locked ->
+ applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id()))));
- for (Deployment deployment : application.deployments().values()) {
+ for (Deployment deployment : application.deployments().values()) {
+ try {
if (deployment.version().getMajor() < 7) continue;
var collectedMetrics = controller().metrics().getDeploymentMetrics(application.id(), deployment.zone());
var now = controller().clock().instant();
@@ -70,13 +70,13 @@ public class DeploymentMetricsMaintainer extends Maintainer {
.recordActivityAt(now, existingDeployment.zone()));
});
+ } catch (Exception e) {
+ failures.incrementAndGet();
+ lastException.set(e);
}
- } catch (Exception e) {
- failures.incrementAndGet();
- lastException.set(e);
}
- });
- });
+ })
+ );
pool.shutdown();
try {
pool.awaitTermination(30, TimeUnit.MINUTES);