diff options
4 files changed, 34 insertions, 19 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java index c23af021e3b..ffa820bd433 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterMetricsRetriever.java @@ -6,6 +6,8 @@ import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; +import com.yahoo.yolean.Exceptions; +import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -39,7 +41,12 @@ public class ClusterMetricsRetriever { private static final List<String> WANTED_METRIC_SERVICES = List.of(VESPA_CONTAINER, VESPA_QRSERVER, VESPA_DISTRIBUTOR); - private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create().build(); + private static final CloseableHttpClient httpClient = VespaHttpClientBuilder.create() + .setDefaultRequestConfig(RequestConfig.custom() + .setConnectTimeout(10 * 1000) + .setSocketTimeout(10 * 1000) + .build()) + .build(); /** * Call the metrics API on each host and aggregate the metrics @@ -88,7 +95,7 @@ public class ClusterMetricsRetriever { return slime; } catch (IOException e) { // Usually caused by applications being deleted during metric retrieval - log.warning("Was unable to fetch metrics from " + hostURI); + log.warning("Was unable to fetch metrics from " + hostURI + " : " + Exceptions.toMessageString(e)); return new Slime(); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index b40677a5dc9..69de59e55eb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -347,11 +347,13 @@ public class InternalStepRunner implements StepRunner { if ( ! endpoints.containsKey(zoneId)) return false; - for (URI endpoint : endpoints.get(zoneId).values()) - if ( ! controller.jobController().cloud().ready(endpoint)) { - logger.log("Failed to get 100 consecutive OKs from " + endpoint); + for (URI endpoint : endpoints.get(zoneId).values()) { + URI workingEndpoint = controller.jobController().withWorkingSchemeAndPort(endpoint, id); + if ( ! controller.jobController().cloud().ready(workingEndpoint)) { + logger.log("Failed to get 100 consecutive OKs from " + workingEndpoint); return false; } + } return true; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 8ffcfb9c106..765cb465f50 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -447,15 +447,21 @@ public class JobController { /** Returns a URI of the tester endpoint retrieved from the routing generator, provided it matches an expected form. */ Optional<URI> testerEndpoint(RunId id) { DeploymentId testerId = new DeploymentId(id.tester().id(), id.type().zone(controller.system())); - boolean useHttp = controller.system().isPublic() - && ! directRoutingUseHttps.with(FetchVector.Dimension.APPLICATION_ID, id.tester().id().serializedForm()).value(); return controller.applications().getDeploymentEndpoints(testerId) .stream().findAny() .or(() -> controller.applications().routingPolicies().get(testerId).stream() .findAny() .map(policy -> policy.endpointIn(controller.system()).url())) - // TODO jvenstad: Remove ugly thing when public deployments have a valid web certificate. - .map(uri -> useHttp ? URI.create("http://" + uri.getHost() + ":443/") : uri); + .map(url -> withWorkingSchemeAndPort(url, id.tester().id())); + } + + // TODO jvenstad: Remove ugly thing when public deployments have a valid web certificate. + URI withWorkingSchemeAndPort(URI url, ApplicationId id) { + if ( ! controller.system().isPublic() + || directRoutingUseHttps.with(FetchVector.Dimension.APPLICATION_ID, id.serializedForm()).value()) + return url; + + return URI.create("http://" + url.getHost() + ":443/"); } /** Returns a set containing the zone of the deployment tested in the given run, and all production zones for the application. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index feee2edf896..7c060c599ef 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -46,13 +46,13 @@ public class DeploymentMetricsMaintainer extends Maintainer { // Run parallel stream inside a custom ForkJoinPool so that we can control the number of threads used ForkJoinPool pool = new ForkJoinPool(applicationsToUpdateInParallel); - pool.submit(() -> { + pool.submit(() -> applicationList.parallelStream().forEach(application -> { - try { - applications.lockIfPresent(application.id(), locked -> - applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id())))); + applications.lockIfPresent(application.id(), locked -> + applications.store(locked.with(controller().metrics().getApplicationMetrics(application.id())))); - for (Deployment deployment : application.deployments().values()) { + for (Deployment deployment : application.deployments().values()) { + try { if (deployment.version().getMajor() < 7) continue; var collectedMetrics = controller().metrics().getDeploymentMetrics(application.id(), deployment.zone()); var now = controller().clock().instant(); @@ -70,13 +70,13 @@ public class DeploymentMetricsMaintainer extends Maintainer { .recordActivityAt(now, existingDeployment.zone())); }); + } catch (Exception e) { + failures.incrementAndGet(); + lastException.set(e); } - } catch (Exception e) { - failures.incrementAndGet(); - lastException.set(e); } - }); - }); + }) + ); pool.shutdown(); try { pool.awaitTermination(30, TimeUnit.MINUTES); |