diff options
Diffstat (limited to 'controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java')
-rw-r--r-- | controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java | 87 |
1 files changed, 61 insertions, 26 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 14f2b38f24a..1c5f98cb7f9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -2,16 +2,13 @@ package com.yahoo.vespa.hosted.controller.deployment; import ai.vespa.http.DomainName; -import ai.vespa.http.HttpURL; +import com.google.common.net.InetAddresses; import com.yahoo.component.Version; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.application.api.Notifications; import com.yahoo.config.application.api.Notifications.When; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.EndpointsChecker; -import com.yahoo.config.provision.EndpointsChecker.Availability; -import com.yahoo.config.provision.EndpointsChecker.Status; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.RoutingMethod; @@ -48,7 +45,6 @@ import com.yahoo.yolean.Exceptions; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UncheckedIOException; -import java.net.InetAddress; import java.security.cert.CertificateExpiredException; import java.security.cert.CertificateNotYetValidException; import java.security.cert.X509Certificate; @@ -91,7 +87,6 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.deployReal; import static com.yahoo.vespa.hosted.controller.deployment.Step.deployTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.installTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.report; -import static com.yahoo.yolean.Exceptions.uncheck; import static java.util.Objects.requireNonNull; import static java.util.logging.Level.FINE; import static java.util.logging.Level.INFO; @@ -356,13 +351,13 @@ public class InternalStepRunner implements StepRunner { } if (summary.converged()) { controller.jobController().locked(id, lockedRun -> lockedRun.withSummary(null)); - Availability availability = endpointsAvailable(id.application(), id.type().zone(), logger); - if (availability.status() == Status.available) { + if (endpointsAvailable(id.application(), id.type().zone(), logger)) { + if (containersAreUp(id.application(), id.type().zone(), logger)) { logger.log("Installation succeeded!"); return Optional.of(running); + } } - logger.log(availability.message()); - if (availability.status() == Status.endpointsUnavailable && timedOut(id, deployment.get(), timeouts.endpoint())) { + else if (timedOut(id, deployment.get(), timeouts.endpoint())) { logger.log(WARNING, "Endpoints failed to show up within " + timeouts.endpoint().toMinutes() + " minutes!"); return Optional.of(error); } @@ -481,6 +476,21 @@ public class InternalStepRunner implements StepRunner { return Optional.empty(); } + /** Returns true iff all calls to endpoint in the deployment give 100 consecutive 200 OK responses on /status.html. */ + private boolean containersAreUp(ApplicationId id, ZoneId zoneId, DualLogger logger) { + var endpoints = controller.routing().readTestRunnerEndpointsOf(Set.of(new DeploymentId(id, zoneId))); + if ( ! endpoints.containsKey(zoneId)) + return false; + + return endpoints.get(zoneId).parallelStream().allMatch(endpoint -> { + boolean ready = controller.jobController().cloud().ready(endpoint.url()); + if (!ready) { + logger.log("Failed to get 100 consecutive OKs from " + endpoint); + } + return ready; + }); + } + /** Returns true iff all containers in the tester deployment give 100 consecutive 200 OK responses on /status.html. */ private boolean testerContainersAreUp(ApplicationId id, ZoneId zoneId, DualLogger logger) { DeploymentId deploymentId = new DeploymentId(id, zoneId); @@ -492,25 +502,50 @@ public class InternalStepRunner implements StepRunner { } } - private Availability endpointsAvailable(ApplicationId id, ZoneId zone, DualLogger logger) { + private boolean endpointsAvailable(ApplicationId id, ZoneId zone, DualLogger logger) { DeploymentId deployment = new DeploymentId(id, zone); Map<ZoneId, List<Endpoint>> endpoints = controller.routing().readTestRunnerEndpointsOf(Set.of(deployment)); + if ( ! endpoints.containsKey(zone)) { + logger.log("Endpoints not yet ready."); + return false; + } + for (var endpoint : endpoints.get(zone)) { + DomainName endpointName = DomainName.of(endpoint.dnsName()); + var ipAddress = controller.jobController().cloud().resolveHostName(endpointName); + if (ipAddress.isEmpty()) { + logger.log(INFO, "DNS lookup yielded no IP address for '" + endpointName + "'."); + return false; + } + DeploymentRoutingContext context = controller.routing().of(deployment); + if (context.routingMethod() == RoutingMethod.exclusive) { + RoutingPolicy policy = context.routingPolicy(ClusterSpec.Id.from(endpoint.name())) + .orElseThrow(() -> new IllegalStateException(endpoint + " has no matching policy")); + if (policy.ipAddress().isPresent()) { + if (ipAddress.equals(policy.ipAddress().map(InetAddresses::forString))) continue; + logger.log(INFO, "IP address of '" + endpointName + "' (" + + ipAddress.map(InetAddresses::toAddrString).get() + ") and load balancer " + + "' (" + policy.ipAddress().orElseThrow() + ") are not equal"); + return false; + } + + var cNameValue = controller.jobController().cloud().resolveCname(endpointName); + if ( ! cNameValue.map(policy.canonicalName().get()::equals).orElse(false)) { + logger.log(INFO, "CNAME '" + endpointName + "' points at " + + cNameValue.map(name -> "'" + name + "'").orElse("nothing") + + " but should point at load balancer '" + policy.canonicalName() + "'"); + return false; + } + var loadBalancerAddress = controller.jobController().cloud().resolveHostName(policy.canonicalName().get()); + if ( ! loadBalancerAddress.equals(ipAddress)) { + logger.log(INFO, "IP address of CNAME '" + endpointName + "' (" + ipAddress.get() + ") and load balancer '" + + policy.canonicalName().get() + "' (" + loadBalancerAddress.orElse(null) + ") are not equal"); + return false; + } + } + } + logEndpoints(endpoints, logger); - DeploymentRoutingContext context = controller.routing().of(deployment); - boolean resolveEndpoints = context.routingMethod() == RoutingMethod.exclusive; - return controller.serviceRegistry().testerCloud().verifyEndpoints( - deployment, - endpoints.getOrDefault(zone, List.of()) - .stream() - .map(endpoint -> { - ClusterSpec.Id cluster = ClusterSpec.Id.from(endpoint.name()); - RoutingPolicy policy = context.routingPolicy(cluster).get(); - return new EndpointsChecker.Endpoint(cluster, - HttpURL.from(endpoint.url()), - policy.ipAddress().filter(__ -> resolveEndpoints).map(uncheck(InetAddress::getByName)), - policy.canonicalName().filter(__ -> resolveEndpoints), - policy.isPublic()); - }).toList()); + return true; } private void logEndpoints(Map<ZoneId, List<Endpoint>> zoneEndpoints, DualLogger logger) { |