diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-05-02 10:53:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-02 10:53:30 +0200 |
commit | 641e413fd94066b789a8ab0149ba0ed3fb7ef31f (patch) | |
tree | 0309d543e830186ab0c30195740dc7beeb789e63 /configserver | |
parent | eba99a09c7f47623b30cda5fd716d7ab7e34e6bc (diff) | |
parent | 8cbbfbc597ab579c12df2f29a92df0d18ab0c256 (diff) |
Merge pull request #26898 from vespa-engine/jonmv/cloud-health-check-in-deployment-job
Verify cloud endpoint health in deployment jobs
Diffstat (limited to 'configserver')
5 files changed, 51 insertions, 12 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 955b1bc8f4f..81de2e06b6c 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -21,7 +21,10 @@ import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.EndpointsChecker; import com.yahoo.config.provision.EndpointsChecker.Availability; +import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider; +import com.yahoo.config.provision.EndpointsChecker.HealthChecker; import com.yahoo.config.provision.EndpointsChecker.Endpoint; +import com.yahoo.config.provision.EndpointsChecker.Status; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostFilter; import com.yahoo.config.provision.InfraDeployer; @@ -172,6 +175,8 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye ConfigserverConfig configserverConfig, Orchestrator orchestrator, TesterClient testerClient, + Zone zone, + HealthCheckerProvider healthCheckers, Metric metric, SecretStore secretStore, FlagSource flagSource) { @@ -180,7 +185,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye infraDeployerProvider.getInfraDeployer(), configConvergenceChecker, httpProxy, - createEndpointsChecker(configserverConfig), + createEndpointsChecker(configserverConfig, zone, healthCheckers.getHealthChecker()), configserverConfig, orchestrator, new LogRetriever(), @@ -1222,28 +1227,36 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye } - private static EndpointsChecker createEndpointsChecker(ConfigserverConfig config) { + private static EndpointsChecker createEndpointsChecker(ConfigserverConfig config, Zone zone, HealthChecker healthChecker) { CloseableHttpClient client = (SystemName.from(config.system()).isPublic() ? DefaultHttpClientBuilder.create(() -> null, "hosted-vespa-convergence-health-checker") : VespaHttpClientBuilder.custom().apacheBuilder().setUserAgent("hosted-vespa-convergence-health-checker")) .setDefaultHeaders(List.of(new BasicHeader(HttpHeaders.CONNECTION, "close"))) .build(); return EndpointsChecker.of(endpoint -> { + Availability health = healthChecker.healthy(endpoint); + if ( health.status() != Status.available // Unhealthy targets is the root cause, so return those details. + || endpoint.isPublic() // Controller checks /status.html on its own. + || endpoint.account().isEnclave(zone)) // Private endpoints in enclave are not reachable by us. + return health; + int remainingFailures = 3; - int remainingSuccesses = 100; + int remainingSuccesses = 10; while (remainingSuccesses > 0 && remainingFailures > 0) { try { if (client.execute(new HttpGet(endpoint.url().withPath(parse("/status.html")).asURI()), response -> response.getCode() == 200)) remainingSuccesses--; - else remainingFailures--; + else + throw new IOException("got non-200 status code"); } catch (Exception e) { log.log(Level.FINE, e, () -> "Failed to check " + endpoint + "status.html: " + e.getMessage()); - remainingFailures--; + if (--remainingFailures == 0) + return new Availability(Status.containersUnhealthy, "Failed to get enough healthy responses from " + endpoint.url()); } } - return remainingSuccesses == 0; + return Availability.ready; }); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java b/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java new file mode 100644 index 00000000000..2d54f256a05 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java @@ -0,0 +1,19 @@ +package com.yahoo.vespa.config.server; + +import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider; +import com.yahoo.container.di.componentgraph.Provider; + +/** + * Default stub for container health checker, overridden by node-repository when that is present. + * + * @author jonmv + */ +public class HealthCheckerProviderProvider implements Provider<HealthCheckerProvider> { + + @Override + public HealthCheckerProvider get() { return new HealthCheckerProvider() { }; } + + @Override + public void deconstruct() { } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java index 62a1704b350..9a6e4632071 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java @@ -10,6 +10,7 @@ import com.yahoo.config.application.api.ApplicationFile; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.EndpointsChecker.Availability; import com.yahoo.config.provision.EndpointsChecker.Endpoint; @@ -113,7 +114,7 @@ public class ApplicationHandler extends HttpHandler { public HttpResponse handlePOST(HttpRequest request) { Path path = new Path(request.getUri()); - if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/verify-endpoints")) return verifyEndpoints(request); + if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/verify-endpoints")) return verifyEndpoints(applicationId(path), request); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/reindex")) return triggerReindexing(applicationId(path), request); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/reindexing")) return enableReindexing(applicationId(path)); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/restart")) return restart(applicationId(path), request); @@ -332,17 +333,19 @@ public class ApplicationHandler extends HttpHandler { return new MessageResponse("Success"); } - private HttpResponse verifyEndpoints(HttpRequest request) { + private HttpResponse verifyEndpoints(ApplicationId applicationId, HttpRequest request) { byte[] data = uncheck(() -> request.getData().readAllBytes()); List<Endpoint> endpoints = new ArrayList<>(); SlimeUtils.jsonToSlime(data).get() .field("endpoints") .traverse((ArrayTraverser) (__, endpointObject) -> { - endpoints.add(new Endpoint(ClusterSpec.Id.from(endpointObject.field("clusterName").asString()), + endpoints.add(new Endpoint(applicationId, + ClusterSpec.Id.from(endpointObject.field("clusterName").asString()), HttpURL.from(URI.create(endpointObject.field("url").asString())), SlimeUtils.optionalString(endpointObject.field("ipAddress")).map(uncheck(InetAddress::getByName)), SlimeUtils.optionalString(endpointObject.field("canonicalName")).map(DomainName::of), - endpointObject.field("public").asBool())); + endpointObject.field("public").asBool(), + CloudAccount.from(endpointObject.field("account").asString()))); }); if (endpoints.isEmpty()) throw new IllegalArgumentException("No endpoints in request " + request); diff --git a/configserver/src/main/resources/configserver-app/services.xml b/configserver/src/main/resources/configserver-app/services.xml index bba7d9627dd..b6904467893 100644 --- a/configserver/src/main/resources/configserver-app/services.xml +++ b/configserver/src/main/resources/configserver-app/services.xml @@ -26,6 +26,7 @@ <component id="com.yahoo.vespa.config.server.tenant.TenantRepository" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.host.HostRegistry" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.ApplicationRepository" bundle="configserver" /> + <component id="com.yahoo.vespa.config.server.HealthCheckerProviderProvider" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.version.VersionState" bundle="configserver" /> <component id="com.yahoo.config.provision.Zone" bundle="config-provisioning" /> <component id="com.yahoo.vespa.config.server.application.ConfigConvergenceChecker" bundle="configserver" /> diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java index e8c4d819c31..306ba6da6f9 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java @@ -10,6 +10,7 @@ import com.yahoo.config.model.api.PortInfo; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.EndpointsChecker; import com.yahoo.config.provision.EndpointsChecker.Availability; @@ -511,11 +512,13 @@ public class ApplicationHandlerTest { @Test public void testVerifyEndpoints() { - expectedEndpoints = List.of(new Endpoint(ClusterSpec.Id.from("bluster"), + expectedEndpoints = List.of(new Endpoint(ApplicationId.defaultId(), + ClusterSpec.Id.from("bluster"), HttpURL.from(URI.create("https://bluster.tld:1234")), Optional.of(uncheck(() -> InetAddress.getByName("4.3.2.1"))), Optional.of(DomainName.of("fluster.tld")), - false)); + false, + CloudAccount.empty)); availability = new Availability(EndpointsChecker.Status.available, "Endpoints are ready"); ApplicationHandler handler = createApplicationHandler(); HttpRequest request = createTestRequest(toUrlPath(applicationId, Zone.defaultZone(), true) + "/verify-endpoints", |