diff options
12 files changed, 105 insertions, 37 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java index d9ced0177e5..c33a575e9b7 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java @@ -19,29 +19,37 @@ import java.util.Optional; */ public interface EndpointsChecker { - record Endpoint(ClusterSpec.Id clusterName, + record Endpoint(ApplicationId applicationId, + ClusterSpec.Id clusterName, HttpURL url, Optional<InetAddress> ipAddress, Optional<DomainName> canonicalName, - boolean isPublic) { } + boolean isPublic, + CloudAccount account) { } /** Status sorted by increasing readiness. */ enum Status { endpointsUnavailable, containersUnhealthy, available } - record Availability(Status status, String message) { } + record Availability(Status status, String message) { + public static final Availability ready = new Availability(Status.available, "Endpoints are ready."); + } interface HostNameResolver { Optional<InetAddress> resolve(DomainName hostName); } interface CNameResolver { Optional<DomainName> resolve(DomainName hostName); } - interface ContainerHealthChecker { boolean healthy(Endpoint endpoint); } + interface HealthChecker { Availability healthy(Endpoint endpoint); } + + interface HealthCheckerProvider { + default HealthChecker getHealthChecker() { return __ -> Availability.ready; } + } - static EndpointsChecker of(ContainerHealthChecker containerHealthChecker) { - return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveHostName, EndpointsChecker::resolveCname, containerHealthChecker); + static EndpointsChecker of(HealthChecker healthChecker) { + return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveHostName, EndpointsChecker::resolveCname, healthChecker); } - static EndpointsChecker mock(HostNameResolver hostNameResolver, CNameResolver cNameResolver, ContainerHealthChecker containerHealthChecker) { - return zoneEndpoints -> endpointsAvailable(zoneEndpoints, hostNameResolver, cNameResolver, containerHealthChecker); + static EndpointsChecker mock(HostNameResolver hostNameResolver, CNameResolver cNameResolver, HealthChecker healthChecker) { + return zoneEndpoints -> endpointsAvailable(zoneEndpoints, hostNameResolver, cNameResolver, healthChecker); } Availability endpointsAvailable(List<Endpoint> zoneEndpoints); @@ -49,7 +57,7 @@ public interface EndpointsChecker { private static Availability endpointsAvailable(List<Endpoint> zoneEndpoints, HostNameResolver hostNameResolver, CNameResolver cNameResolver, - ContainerHealthChecker containerHealthChecker) { + HealthChecker healthChecker) { if (zoneEndpoints.isEmpty()) return new Availability(Status.endpointsUnavailable, "Endpoints not yet ready."); @@ -89,11 +97,13 @@ public interface EndpointsChecker { } } - for (Endpoint endpoint : zoneEndpoints) - if ( ! containerHealthChecker.healthy(endpoint)) - return new Availability(Status.containersUnhealthy, "Failed to get enough healthy responses from " + endpoint.url()); - - return new Availability(Status.available, "Endpoints are ready"); + Availability availability = Availability.ready; + for (Endpoint endpoint : zoneEndpoints) { + Availability candidate = healthChecker.healthy(endpoint); + if (candidate.status.compareTo(availability.status) < 0) + availability = candidate; + } + return availability; } /** Returns the IP address of the given host name, if any. */ diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 955b1bc8f4f..81de2e06b6c 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -21,7 +21,10 @@ import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.EndpointsChecker; import com.yahoo.config.provision.EndpointsChecker.Availability; +import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider; +import com.yahoo.config.provision.EndpointsChecker.HealthChecker; import com.yahoo.config.provision.EndpointsChecker.Endpoint; +import com.yahoo.config.provision.EndpointsChecker.Status; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostFilter; import com.yahoo.config.provision.InfraDeployer; @@ -172,6 +175,8 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye ConfigserverConfig configserverConfig, Orchestrator orchestrator, TesterClient testerClient, + Zone zone, + HealthCheckerProvider healthCheckers, Metric metric, SecretStore secretStore, FlagSource flagSource) { @@ -180,7 +185,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye infraDeployerProvider.getInfraDeployer(), configConvergenceChecker, httpProxy, - createEndpointsChecker(configserverConfig), + createEndpointsChecker(configserverConfig, zone, healthCheckers.getHealthChecker()), configserverConfig, orchestrator, new LogRetriever(), @@ -1222,28 +1227,36 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye } - private static EndpointsChecker createEndpointsChecker(ConfigserverConfig config) { + private static EndpointsChecker createEndpointsChecker(ConfigserverConfig config, Zone zone, HealthChecker healthChecker) { CloseableHttpClient client = (SystemName.from(config.system()).isPublic() ? DefaultHttpClientBuilder.create(() -> null, "hosted-vespa-convergence-health-checker") : VespaHttpClientBuilder.custom().apacheBuilder().setUserAgent("hosted-vespa-convergence-health-checker")) .setDefaultHeaders(List.of(new BasicHeader(HttpHeaders.CONNECTION, "close"))) .build(); return EndpointsChecker.of(endpoint -> { + Availability health = healthChecker.healthy(endpoint); + if ( health.status() != Status.available // Unhealthy targets is the root cause, so return those details. + || endpoint.isPublic() // Controller checks /status.html on its own. + || endpoint.account().isEnclave(zone)) // Private endpoints in enclave are not reachable by us. + return health; + int remainingFailures = 3; - int remainingSuccesses = 100; + int remainingSuccesses = 10; while (remainingSuccesses > 0 && remainingFailures > 0) { try { if (client.execute(new HttpGet(endpoint.url().withPath(parse("/status.html")).asURI()), response -> response.getCode() == 200)) remainingSuccesses--; - else remainingFailures--; + else + throw new IOException("got non-200 status code"); } catch (Exception e) { log.log(Level.FINE, e, () -> "Failed to check " + endpoint + "status.html: " + e.getMessage()); - remainingFailures--; + if (--remainingFailures == 0) + return new Availability(Status.containersUnhealthy, "Failed to get enough healthy responses from " + endpoint.url()); } } - return remainingSuccesses == 0; + return Availability.ready; }); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java b/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java new file mode 100644 index 00000000000..2d54f256a05 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/HealthCheckerProviderProvider.java @@ -0,0 +1,19 @@ +package com.yahoo.vespa.config.server; + +import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider; +import com.yahoo.container.di.componentgraph.Provider; + +/** + * Default stub for container health checker, overridden by node-repository when that is present. + * + * @author jonmv + */ +public class HealthCheckerProviderProvider implements Provider<HealthCheckerProvider> { + + @Override + public HealthCheckerProvider get() { return new HealthCheckerProvider() { }; } + + @Override + public void deconstruct() { } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java index 62a1704b350..9a6e4632071 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java @@ -10,6 +10,7 @@ import com.yahoo.config.application.api.ApplicationFile; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.EndpointsChecker.Availability; import com.yahoo.config.provision.EndpointsChecker.Endpoint; @@ -113,7 +114,7 @@ public class ApplicationHandler extends HttpHandler { public HttpResponse handlePOST(HttpRequest request) { Path path = new Path(request.getUri()); - if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/verify-endpoints")) return verifyEndpoints(request); + if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/verify-endpoints")) return verifyEndpoints(applicationId(path), request); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/reindex")) return triggerReindexing(applicationId(path), request); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/reindexing")) return enableReindexing(applicationId(path)); if (path.matches("/application/v2/tenant/{tenant}/application/{application}/environment/{ignore}/region/{ignore}/instance/{instance}/restart")) return restart(applicationId(path), request); @@ -332,17 +333,19 @@ public class ApplicationHandler extends HttpHandler { return new MessageResponse("Success"); } - private HttpResponse verifyEndpoints(HttpRequest request) { + private HttpResponse verifyEndpoints(ApplicationId applicationId, HttpRequest request) { byte[] data = uncheck(() -> request.getData().readAllBytes()); List<Endpoint> endpoints = new ArrayList<>(); SlimeUtils.jsonToSlime(data).get() .field("endpoints") .traverse((ArrayTraverser) (__, endpointObject) -> { - endpoints.add(new Endpoint(ClusterSpec.Id.from(endpointObject.field("clusterName").asString()), + endpoints.add(new Endpoint(applicationId, + ClusterSpec.Id.from(endpointObject.field("clusterName").asString()), HttpURL.from(URI.create(endpointObject.field("url").asString())), SlimeUtils.optionalString(endpointObject.field("ipAddress")).map(uncheck(InetAddress::getByName)), SlimeUtils.optionalString(endpointObject.field("canonicalName")).map(DomainName::of), - endpointObject.field("public").asBool())); + endpointObject.field("public").asBool(), + CloudAccount.from(endpointObject.field("account").asString()))); }); if (endpoints.isEmpty()) throw new IllegalArgumentException("No endpoints in request " + request); diff --git a/configserver/src/main/resources/configserver-app/services.xml b/configserver/src/main/resources/configserver-app/services.xml index bba7d9627dd..b6904467893 100644 --- a/configserver/src/main/resources/configserver-app/services.xml +++ b/configserver/src/main/resources/configserver-app/services.xml @@ -26,6 +26,7 @@ <component id="com.yahoo.vespa.config.server.tenant.TenantRepository" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.host.HostRegistry" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.ApplicationRepository" bundle="configserver" /> + <component id="com.yahoo.vespa.config.server.HealthCheckerProviderProvider" bundle="configserver" /> <component id="com.yahoo.vespa.config.server.version.VersionState" bundle="configserver" /> <component id="com.yahoo.config.provision.Zone" bundle="config-provisioning" /> <component id="com.yahoo.vespa.config.server.application.ConfigConvergenceChecker" bundle="configserver" /> diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java index e8c4d819c31..306ba6da6f9 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java @@ -10,6 +10,7 @@ import com.yahoo.config.model.api.PortInfo; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.EndpointsChecker; import com.yahoo.config.provision.EndpointsChecker.Availability; @@ -511,11 +512,13 @@ public class ApplicationHandlerTest { @Test public void testVerifyEndpoints() { - expectedEndpoints = List.of(new Endpoint(ClusterSpec.Id.from("bluster"), + expectedEndpoints = List.of(new Endpoint(ApplicationId.defaultId(), + ClusterSpec.Id.from("bluster"), HttpURL.from(URI.create("https://bluster.tld:1234")), Optional.of(uncheck(() -> InetAddress.getByName("4.3.2.1"))), Optional.of(DomainName.of("fluster.tld")), - false)); + false, + CloudAccount.empty)); availability = new Availability(EndpointsChecker.Status.available, "Endpoints are ready"); ApplicationHandler handler = createApplicationHandler(); HttpRequest request = createTestRequest(toUrlPath(applicationId, Zone.defaultZone(), true) + "/verify-endpoints", diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java index e29e8086c80..c148afb9190 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java @@ -26,7 +26,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.Teste public class MockTesterCloud implements TesterCloud { private final NameService nameService; - private final EndpointsChecker endpointsChecker = EndpointsChecker.mock(this::resolveHostName, this::resolveCname, __ -> true); + private final EndpointsChecker endpointsChecker = EndpointsChecker.mock(this::resolveHostName, this::resolveCname, __ -> Availability.ready); private List<LogEntry> log = new ArrayList<>(); private Status status = NOT_STARTED; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 7aacd93813c..71ab1c4d7da 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -356,7 +356,7 @@ public class InternalStepRunner implements StepRunner { } if (summary.converged()) { controller.jobController().locked(id, lockedRun -> lockedRun.withSummary(null)); - Availability availability = endpointsAvailable(id.application(), id.type().zone(), logger); + Availability availability = endpointsAvailable(id.application(), id.type().zone(), deployment.get(), logger); if (availability.status() == Status.available) { if (controller.routing().policies().processDnsChallenges(new DeploymentId(id.application(), id.type().zone()))) { logger.log("Installation succeeded!"); @@ -496,24 +496,26 @@ public class InternalStepRunner implements StepRunner { } } - private Availability endpointsAvailable(ApplicationId id, ZoneId zone, DualLogger logger) { - DeploymentId deployment = new DeploymentId(id, zone); - Map<ZoneId, List<Endpoint>> endpoints = controller.routing().readTestRunnerEndpointsOf(Set.of(deployment)); + private Availability endpointsAvailable(ApplicationId id, ZoneId zone, Deployment deployment, DualLogger logger) { + DeploymentId deploymentId = new DeploymentId(id, zone); + Map<ZoneId, List<Endpoint>> endpoints = controller.routing().readTestRunnerEndpointsOf(Set.of(deploymentId)); logEndpoints(endpoints, logger); - DeploymentRoutingContext context = controller.routing().of(deployment); + DeploymentRoutingContext context = controller.routing().of(deploymentId); boolean resolveEndpoints = context.routingMethod() == RoutingMethod.exclusive; return controller.serviceRegistry().testerCloud().verifyEndpoints( - deployment, + deploymentId, endpoints.getOrDefault(zone, List.of()) .stream() .map(endpoint -> { ClusterSpec.Id cluster = ClusterSpec.Id.from(endpoint.name()); RoutingPolicy policy = context.routingPolicy(cluster).get(); - return new EndpointsChecker.Endpoint(cluster, + return new EndpointsChecker.Endpoint(id, + cluster, HttpURL.from(endpoint.url()), policy.ipAddress().filter(__ -> resolveEndpoints).map(uncheck(InetAddress::getByName)), policy.canonicalName().filter(__ -> resolveEndpoints), - policy.isPublic()); + policy.isPublic(), + deployment.cloudAccount()); }).toList()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerService.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerService.java index 313cf45e1ee..4f33e079d8f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerService.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerService.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.lb; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.EndpointsChecker.HealthChecker; import com.yahoo.config.provision.NodeType; /** @@ -9,7 +10,7 @@ import com.yahoo.config.provision.NodeType; * * @author mpolden */ -public interface LoadBalancerService { +public interface LoadBalancerService extends HealthChecker { /** * Provisions load balancers from the given specification. Implementations are expected to be idempotent diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerServiceMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerServiceMock.java index 751f3d46059..a79766a577d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerServiceMock.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancerServiceMock.java @@ -4,6 +4,8 @@ package com.yahoo.vespa.hosted.provision.lb; import ai.vespa.http.DomainName; import com.google.common.collect.ImmutableSet; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.EndpointsChecker.Availability; +import com.yahoo.config.provision.EndpointsChecker.Endpoint; import com.yahoo.config.provision.NodeType; import java.util.Collections; @@ -85,4 +87,9 @@ public class LoadBalancerServiceMock implements LoadBalancerService { instances.remove(loadBalancer.id()); } + @Override + public Availability healthy(Endpoint endpoint) { + return Availability.ready; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/SharedLoadBalancerService.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/SharedLoadBalancerService.java index f9f26852b0d..e49d1b302cf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/SharedLoadBalancerService.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/SharedLoadBalancerService.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.lb; import ai.vespa.http.DomainName; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.EndpointsChecker.Availability; +import com.yahoo.config.provision.EndpointsChecker.Endpoint; import com.yahoo.config.provision.NodeType; import java.util.List; @@ -68,4 +70,9 @@ public class SharedLoadBalancerService implements LoadBalancerService { return nodeType == NodeType.tenant && clusterType.isContainer(); } + @Override + public Availability healthy(Endpoint endpoint) { + return Availability.ready; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionServiceProvider.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionServiceProvider.java index 6e301b7724c..65039aaca77 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionServiceProvider.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisionServiceProvider.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; +import com.yahoo.config.provision.EndpointsChecker.HealthCheckerProvider; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerService; import java.util.Optional; @@ -10,11 +11,12 @@ import java.util.Optional; * * @author freva */ -public interface ProvisionServiceProvider { +public interface ProvisionServiceProvider extends HealthCheckerProvider { Optional<LoadBalancerService> getLoadBalancerService(); Optional<HostProvisioner> getHostProvisioner(); HostResourcesCalculator getHostResourcesCalculator(); + } |