diff options
author | Martin Polden <mpolden@mpolden.no> | 2022-02-10 13:02:56 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2022-02-10 13:09:30 +0100 |
commit | 42fe732584876b156fc936f718dabf87af018220 (patch) | |
tree | 0bb74302d7f8b90beeaccf53b9e1173ea5195b38 /routing-generator/src/main/java/com/yahoo/vespa/hosted/routing | |
parent | 0b284f194c0b2dc9b1f8b36b489911f32961d840 (diff) |
Import routing-generator
Diffstat (limited to 'routing-generator/src/main/java/com/yahoo/vespa/hosted/routing')
15 files changed, 1561 insertions, 0 deletions
diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/Router.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/Router.java new file mode 100644 index 00000000000..c7cd5a75359 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/Router.java @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing; + +/** + * A {@link Router} (e.g. a reverse proxy) consumes a {@link RoutingTable} by + * translating it to the router's own format and loading it. + * + * @author mpolden + */ +public interface Router { + + /** Load the given routing table */ + void load(RoutingTable table); + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingGenerator.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingGenerator.java new file mode 100644 index 00000000000..a1d84873379 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingGenerator.java @@ -0,0 +1,166 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing; + +import com.yahoo.cloud.config.LbServicesConfig; +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.annotation.Inject; +import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.config.ConfigInstance; +import com.yahoo.config.subscription.ConfigHandle; +import com.yahoo.config.subscription.ConfigSource; +import com.yahoo.config.subscription.ConfigSourceSet; +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.jdisc.Metric; +import com.yahoo.routing.config.ZoneConfig; +import com.yahoo.system.ProcessExecuter; +import com.yahoo.vespa.hosted.routing.nginx.Nginx; +import com.yahoo.vespa.hosted.routing.status.RoutingStatus; +import com.yahoo.yolean.Exceptions; +import com.yahoo.yolean.concurrent.Sleeper; + +import java.nio.file.FileSystems; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * The routing generator generates a routing table for a hosted Vespa zone. + * + * Config is retrieved by subscribing to {@link LbServicesConfig} for all deployments. This is then translated to a + * {@link RoutingTable}, which is loaded into a {@link Router}. + * + * @author oyving + * @author mpolden + */ +public class RoutingGenerator extends AbstractComponent { + + private static final Logger log = Logger.getLogger(RoutingGenerator.class.getName()); + private static final Duration configTimeout = Duration.ofSeconds(10); + private static final Duration shutdownTimeout = Duration.ofSeconds(10); + private static final Duration refreshInterval = Duration.ofSeconds(30); + + private final Router router; + private final Clock clock; + @SuppressWarnings("removal") // TODO Vespa 8: remove + private final ConfigSubscriber configSubscriber; + + private final ExecutorService executor = Executors.newSingleThreadExecutor(new DaemonThreadFactory("routing-generator-config-subscriber")); + private final ScheduledExecutorService scheduledExecutor = new ScheduledThreadPoolExecutor(1, new DaemonThreadFactory("routing-generator-maintenance")); + private final Object monitor = new Object(); + + private volatile RoutingTable routingTable = null; + + @Inject + @SuppressWarnings("removal") // TODO Vespa 8: remove + public RoutingGenerator(ZoneConfig zoneConfig, RoutingStatus routingStatus, Metric metric) { + this(new ConfigSourceSet(zoneConfig.configserver()), new Nginx(FileSystems.getDefault(), + new ProcessExecuter(), + Sleeper.DEFAULT, + Clock.systemUTC(), + routingStatus, + metric), + Clock.systemUTC()); + } + + @SuppressWarnings("removal") // TODO Vespa 8: remove + RoutingGenerator(ConfigSource configSource, Router router, Clock clock) { + this.router = Objects.requireNonNull(router); + this.clock = Objects.requireNonNull(clock); + this.configSubscriber = new ConfigSubscriber(configSource); + executor.execute(() -> subscribeOn(LbServicesConfig.class, this::load, configSource, executor)); + // Reload configuration periodically. The router depend on state from other sources than config, such as RoutingStatus + scheduledExecutor.scheduleAtFixedRate(this::reload, refreshInterval.toMillis(), refreshInterval.toMillis(), TimeUnit.MILLISECONDS); + } + + /** Get the currently active routing table, if any */ + public Optional<RoutingTable> routingTable() { + synchronized (monitor) { + return Optional.ofNullable(routingTable); + } + } + + /** Reload the current routing table, if any */ + private void reload() { + synchronized (monitor) { + routingTable().ifPresent(this::load); + } + } + + /** Load the given routing table */ + private void load(RoutingTable newTable) { + synchronized (monitor) { + router.load(newTable); + routingTable = newTable; + } + } + + private void load(LbServicesConfig lbServicesConfig, long generation) { + load(RoutingTable.from(lbServicesConfig, generation)); + } + + @SuppressWarnings("removal") // TODO Vespa 8: remove + private <T extends ConfigInstance> void subscribeOn(Class<T> clazz, BiConsumer<T, Long> action, ConfigSource configSource, + ExecutorService executor) { + ConfigHandle<T> configHandle = null; + String configId = "*"; + while (!executor.isShutdown()) { + try { + boolean initializing = true; + log.log(Level.INFO, "Subscribing to configuration " + clazz + "@" + configId + " from " + configSource); + if (configHandle == null) { + configHandle = configSubscriber.subscribe(clazz, configId); + } + while (!executor.isShutdown() && !configSubscriber.isClosed()) { + Instant subscribingAt = clock.instant(); + if (configSubscriber.nextGeneration(configTimeout.toMillis(), initializing) && configHandle.isChanged()) { + log.log(Level.INFO, "Received new configuration: " + configHandle); + T configuration = configHandle.getConfig(); + log.log(Level.FINE, "Received new configuration: " + configuration); + action.accept(configuration, configSubscriber.getGeneration()); + initializing = false; + } else { + log.log(Level.FINE, "Configuration tick with no change: " + configHandle + + ", getting config took " + Duration.between(subscribingAt, clock.instant()) + + ", timeout is " + configTimeout); + } + } + } catch (Exception e) { + log.log(Level.WARNING, "Exception while subscribing to configuration: " + clazz + "@" + configId + + " from " + configSource + ": " + Exceptions.toMessageString(e)); + } + } + } + + @Override + @SuppressWarnings("removal") // TODO Vespa 8: remove + public void deconstruct() { + configSubscriber.close(); + // shutdownNow because ConfigSubscriber#nextGeneration blocks until next config, and we don't want to wait for + // that when shutting down + executor.shutdownNow(); + scheduledExecutor.shutdown(); + awaitTermination("executor", executor); + awaitTermination("scheduledExecutor", scheduledExecutor); + } + + private static void awaitTermination(String name, ExecutorService executorService) { + try { + if (!executorService.awaitTermination(shutdownTimeout.toMillis(), TimeUnit.MILLISECONDS)) { + throw new RuntimeException("Failed to shut down " + name + " within " + shutdownTimeout); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingTable.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingTable.java new file mode 100644 index 00000000000..c19dd506c87 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/RoutingTable.java @@ -0,0 +1,374 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing; + +import com.google.common.hash.Hashing; +import com.yahoo.cloud.config.LbServicesConfig; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.config.provision.zone.ZoneId; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A routing table for a hosted Vespa zone. This holds the details necessary for the routing layer to route traffic to + * deployments. + * + * This is immutable. + * + * @author mpolden + */ +public class RoutingTable { + + private static final String HOSTED_VESPA_TENANT_NAME = "hosted-vespa"; + + private final Map<Endpoint, Target> table; + private final long generation; + + public RoutingTable(Map<Endpoint, Target> table, long generation) { + this.table = Collections.unmodifiableSortedMap(new TreeMap<>(Objects.requireNonNull(table))); + this.generation = generation; + } + + /** Returns the target for given dnsName, if any */ + public Optional<Target> targetOf(String dnsName) { + return Optional.ofNullable(table.get(new Endpoint(dnsName))); + } + + public Map<Endpoint, Target> asMap() { + return table; + } + + /** Returns the Vespa config generation this is based on */ + public long generation() { + return generation; + } + + @Override + public String toString() { + return table.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RoutingTable that = (RoutingTable) o; + return generation == that.generation && table.equals(that.table); + } + + @Override + public int hashCode() { + return Objects.hash(table, generation); + } + + public static RoutingTable from(LbServicesConfig config, long generation) { + Map<Endpoint, Target> entries = new HashMap<>(); + for (var tenants : config.tenants().entrySet()) { + TenantName tenantName = TenantName.from(tenants.getKey()); + if (tenantName.value().equals(HOSTED_VESPA_TENANT_NAME)) continue; + for (var applications : tenants.getValue().applications().entrySet()) { + String[] parts = applications.getKey().split(":"); + if (parts.length != 4) throw new IllegalArgumentException("Invalid deployment ID '" + applications.getKey() + "'"); + + ApplicationName application = ApplicationName.from(parts[0]); + ZoneId zone = ZoneId.from(parts[1], parts[2]); + InstanceName instance = InstanceName.from(parts[3]); + + for (var configuredEndpoint : applications.getValue().endpoints()) { + List<Real> reals = configuredEndpoint.hosts().stream() + .map(hostname -> new Real(hostname, + 4443, + configuredEndpoint.weight(), + applications.getValue().activeRotation())) + .collect(Collectors.toList()); + Endpoint endpoint = new Endpoint(configuredEndpoint.dnsName()); + ClusterSpec.Id cluster = ClusterSpec.Id.from(configuredEndpoint.clusterId()); + Target target; + boolean applicationEndpoint = configuredEndpoint.scope() == LbServicesConfig.Tenants.Applications.Endpoints.Scope.Enum.application; + if (applicationEndpoint) { + target = Target.create(endpoint.dnsName, tenantName, application, cluster, zone, reals); + } else { + target = Target.create(ApplicationId.from(tenantName, application, instance), cluster, zone, reals); + } + entries.merge(endpoint, target, (oldValue, value) -> { + if (applicationEndpoint) { + List<Real> merged = new ArrayList<>(oldValue.reals()); + merged.addAll(value.reals()); + return value.withReals(merged); + } + return oldValue; + }); + } + } + } + return new RoutingTable(entries, generation); + } + + /** The target of an {@link Endpoint} */ + public static class Target implements Comparable<Target> { + + private final String id; + + private final TenantName tenant; + private final ApplicationName application; + private final Optional<InstanceName> instance; + private final ZoneId zone; + private final ClusterSpec.Id cluster; + private final List<Real> reals; + + private Target(String id, TenantName tenant, ApplicationName application, Optional<InstanceName> instance, + ClusterSpec.Id cluster, ZoneId zone, List<Real> reals) { + this.id = Objects.requireNonNull(id); + this.tenant = Objects.requireNonNull(tenant); + this.application = Objects.requireNonNull(application); + this.instance = Objects.requireNonNull(instance); + this.zone = Objects.requireNonNull(zone); + this.cluster = Objects.requireNonNull(cluster); + this.reals = Objects.requireNonNull(reals).stream().sorted().collect(Collectors.toUnmodifiableList()); + for (int i = 0; i < reals.size(); i++) { + for (int j = 0; j < i; j++) { + if (reals.get(i).equals(reals.get(j))) { + throw new IllegalArgumentException("Found duplicate real server: " + reals.get(i)); + } + } + } + } + + /** An unique identifier of this target (previously known as "upstreamName") */ + public String id() { + return id; + } + + /** Returns whether this is an application-level target, which points to reals of multiple instances */ + public boolean applicationLevel() { + return instance.isEmpty(); + } + + public TenantName tenant() { + return tenant; + } + + public ApplicationName application() { + return application; + } + + public Optional<InstanceName> instance() { + return instance; + } + + public ZoneId zone() { + return zone; + } + + public ClusterSpec.Id cluster() { + return cluster; + } + + /** The real servers this points to */ + public List<Real> reals() { + return reals; + } + + /** Returns whether this is active and should receive traffic either through a global or application endpoint */ + public boolean active() { + return reals.stream().anyMatch(Real::active); + } + + /** Returns a copy of this containing given reals */ + public Target withReals(List<Real> reals) { + return new Target(id, tenant, application, instance, cluster, zone, reals); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Target target = (Target) o; + return id.equals(target.id) && tenant.equals(target.tenant) && application.equals(target.application) && instance.equals(target.instance) && zone.equals(target.zone) && cluster.equals(target.cluster) && reals.equals(target.reals); + } + + @Override + public int hashCode() { + return Objects.hash(id, tenant, application, instance, zone, cluster, reals); + } + + @Override + public String toString() { + return "target " + id + " -> " + + "tenant=" + tenant + + ",application=" + application + + ",instance=" + instance + + ",zone=" + zone + + ",cluster=" + cluster + + ",reals=" + reals; + } + + /** Create an instance-level tartget */ + public static Target create(ApplicationId instance, ClusterSpec.Id cluster, ZoneId zone, List<Real> reals) { + return new Target(createId("", instance.tenant(), instance.application(), Optional.of(instance.instance()), cluster, zone), + instance.tenant(), instance.application(), Optional.of(instance.instance()), cluster, zone, reals); + } + + /** Create an application-level target */ + public static Target create(String dnsName, TenantName tenant, ApplicationName application, ClusterSpec.Id cluster, ZoneId zone, List<Real> reals) { + return new Target(createId(Objects.requireNonNull(dnsName), tenant, application, Optional.empty(), cluster, zone), + tenant, application, Optional.empty(), cluster, zone, reals); + } + + /** Create an unique identifier for given dnsName and target */ + private static String createId(String dnsName, TenantName tenant, ApplicationName application, + Optional<InstanceName> instance, ClusterSpec.Id cluster, ZoneId zone) { + if (instance.isEmpty()) { // Application-scoped endpoint + if (dnsName.isEmpty()) throw new IllegalArgumentException("dnsName must given for application-scoped endpoint"); + String endpointHash = Hashing.sha1().hashString(dnsName, StandardCharsets.UTF_8).toString(); + return "application-" + endpointHash + "." +application.value() + "." + tenant.value(); + } else { + if (!dnsName.isEmpty()) throw new IllegalArgumentException("dnsName must not be given for instance-level endpoint"); + } + return Stream.of(nullIfDefault(cluster.value()), + nullIfDefault(instance.get().value()), + application.value(), + tenant.value(), + zone.region().value(), + zone.environment().value()) + .filter(Objects::nonNull) + .map(Target::sanitize) + .collect(Collectors.joining(".")); + } + + private static String nullIfDefault(String value) { // Sublime sadness + return "default".equals(value) ? null : value; + } + + private static String sanitize(String id) { + return id.toLowerCase() + .replace('_', '-') + .replaceAll("[^a-z0-9-]*", ""); + } + + @Override + public int compareTo(RoutingTable.Target other) { + return id.compareTo(other.id); + } + + } + + /** An externally visible endpoint */ + public static class Endpoint implements Comparable<Endpoint> { + + private final String dnsName; + + public Endpoint(String dnsName) { + this.dnsName = Objects.requireNonNull(dnsName); + } + + /** The DNS name of this endpoint. This does not contain a trailing dot */ + public String dnsName() { + return dnsName; + } + + @Override + public String toString() { + return "endpoint " + dnsName; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Endpoint endpoint = (Endpoint) o; + return dnsName.equals(endpoint.dnsName); + } + + @Override + public int hashCode() { + return Objects.hash(dnsName); + } + + @Override + public int compareTo(RoutingTable.Endpoint o) { + return dnsName.compareTo(o.dnsName); + } + + } + + /** A real server, i.e. a node in a Vespa cluster */ + public static class Real implements Comparable<Real> { + + private static final Comparator<Real> COMPARATOR = Comparator.comparing(Real::hostname) + .thenComparing(Real::port) + .thenComparing(Real::weight) + .thenComparing(Real::active); + + private final String hostname; + private final int port; + private final int weight; + private final boolean active; + + public Real(String hostname, int port, int weight, boolean active) { + this.hostname = Objects.requireNonNull(hostname); + this.port = port; + this.weight = weight; + this.active = active; + } + + /** The hostname of this */ + public String hostname() { + return hostname; + } + + /** The port this is listening on */ + public int port() { + return port; + } + + /** The relative weight of this. Controls the amount of traffic this should receive */ + public int weight() { + return weight; + } + + /** Returns whether this is active and should receive traffic */ + public boolean active() { + return active; + } + + @Override + public String toString() { + return "real server " + hostname + "[port=" + port + ",weight=" + weight + ",active=" + active + "]"; + } + + @Override + public int compareTo(Real other) { + return COMPARATOR.compare(this, other); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Real real = (Real) o; + return port == real.port && hostname.equals(real.hostname); + } + + @Override + public int hashCode() { + return Objects.hash(hostname, port); + } + + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/Nginx.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/Nginx.java new file mode 100644 index 00000000000..f3368c43b92 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/Nginx.java @@ -0,0 +1,188 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.nginx; + +import com.yahoo.collections.Pair; +import com.yahoo.jdisc.Metric; +import com.yahoo.system.ProcessExecuter; +import com.yahoo.vespa.hosted.routing.Router; +import com.yahoo.vespa.hosted.routing.RoutingTable; +import com.yahoo.vespa.hosted.routing.status.RoutingStatus; +import com.yahoo.yolean.Exceptions; +import com.yahoo.yolean.concurrent.Sleeper; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Objects; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * This loads a {@link RoutingTable} into a running Nginx process. + * + * @author mpolden + */ +public class Nginx implements Router { + + private static final Logger LOG = Logger.getLogger(Nginx.class.getName()); + private static final int EXEC_ATTEMPTS = 5; + + static final String GENERATED_UPSTREAMS_METRIC = "upstreams_generated"; + static final String CONFIG_RELOADS_METRIC = "upstreams_nginx_reloads"; + static final String OK_CONFIG_RELOADS_METRIC = "upstreams_nginx_reloads_succeeded"; + + private final FileSystem fileSystem; + private final ProcessExecuter processExecuter; + private final Sleeper sleeper; + private final Clock clock; + private final RoutingStatus routingStatus; + private final Metric metric; + + private final Object monitor = new Object(); + + public Nginx(FileSystem fileSystem, ProcessExecuter processExecuter, Sleeper sleeper, Clock clock, RoutingStatus routingStatus, Metric metric) { + this.fileSystem = Objects.requireNonNull(fileSystem); + this.processExecuter = Objects.requireNonNull(processExecuter); + this.sleeper = Objects.requireNonNull(sleeper); + this.clock = Objects.requireNonNull(clock); + this.routingStatus = Objects.requireNonNull(routingStatus); + this.metric = Objects.requireNonNull(metric); + } + + @Override + public void load(RoutingTable table) { + synchronized (monitor) { + try { + testConfig(table); + loadConfig(table.asMap().size()); + gcConfig(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + + /** Write given routing table to a temporary config file and test it */ + private void testConfig(RoutingTable table) throws IOException { + String config = NginxConfig.from(table, routingStatus); + Files.createDirectories(NginxPath.root.in(fileSystem)); + atomicWriteString(NginxPath.temporaryConfig.in(fileSystem), config); + + // This retries config testing because it can fail due to external factors, such as hostnames not resolving in + // DNS. Retrying can be removed if we switch to having only IP addresses in config + retryingExec("/usr/bin/sudo /opt/vespa/bin/vespa-verify-nginx"); + } + + /** Load tested config into Nginx */ + private void loadConfig(int upstreamCount) throws IOException { + Path configPath = NginxPath.config.in(fileSystem); + Path tempConfigPath = NginxPath.temporaryConfig.in(fileSystem); + try { + String currentConfig = Files.readString(configPath); + String newConfig = Files.readString(tempConfigPath); + if (currentConfig.equals(newConfig)) { + Files.deleteIfExists(tempConfigPath); + return; + } + Path rotatedConfig = NginxPath.config.rotatedIn(fileSystem, clock.instant()); + atomicCopy(configPath, rotatedConfig); + } catch (NoSuchFileException ignored) { + // Fine, not enough files exist to compare or rotate + } + Files.move(tempConfigPath, configPath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + metric.add(CONFIG_RELOADS_METRIC, 1, null); + // Retry reload. Same rationale for retrying as in testConfig() + LOG.info("Loading new configuration file from " + configPath); + retryingExec("/usr/bin/sudo /opt/vespa/bin/vespa-reload-nginx"); + metric.add(OK_CONFIG_RELOADS_METRIC, 1, null); + metric.set(GENERATED_UPSTREAMS_METRIC, upstreamCount, null); + } + + /** Remove old config files */ + private void gcConfig() throws IOException { + Instant oneWeekAgo = clock.instant().minus(Duration.ofDays(7)); + // Rotated files have the format <basename>-yyyy-MM-dd-HH:mm:ss.SSS + String configBasename = NginxPath.config.in(fileSystem).getFileName().toString(); + Files.list(NginxPath.root.in(fileSystem)) + .filter(Files::isRegularFile) + .filter(path -> path.getFileName().getFileName().toString().startsWith(configBasename) || + // TODO(mpolden): This cleans up old layer 7 files. Remove after 2022-03-15 + path.getFileName().getFileName().toString().startsWith("nginx.conf-")) + .filter(path -> rotatedAt(path).map(instant -> instant.isBefore(oneWeekAgo)) + .orElse(false)) + .forEach(path -> Exceptions.uncheck(() -> Files.deleteIfExists(path))); + } + + /** Returns the time given path was rotated */ + private Optional<Instant> rotatedAt(Path path) { + String[] parts = path.getFileName().toString().split("-", 2); + if (parts.length != 2) return Optional.empty(); + return Optional.of(LocalDateTime.from(NginxPath.ROTATED_SUFFIX_FORMAT.parse(parts[1])).toInstant(ZoneOffset.UTC)); + } + + /** Run given command. Retries after a delay on failure */ + private void retryingExec(String command) { + boolean success = false; + for (int attempt = 1; attempt <= EXEC_ATTEMPTS; attempt++) { + String errorMessage; + try { + Pair<Integer, String> result = processExecuter.exec(command); + if (result.getFirst() == 0) { + success = true; + break; + } + errorMessage = result.getSecond(); + } catch (IOException e) { + errorMessage = Exceptions.toMessageString(e); + } + Duration duration = Duration.ofSeconds((long) Math.pow(2, attempt)); + LOG.log(Level.WARNING, "Failed to run " + command + " on attempt " + attempt + ": " + errorMessage + + ". Retrying in " + duration); + sleeper.sleep(duration); + } + if (!success) { + throw new RuntimeException("Failed to run " + command + " successfully after " + EXEC_ATTEMPTS + + " attempts, giving up"); + } + } + + /** Apply pathOperation to a temporary file, then atomically move the temporary file to path */ + private void atomicWrite(Path path, PathOperation pathOperation) throws IOException { + Path tempFile = null; + try { + tempFile = Files.createTempFile(path.getParent(), "nginx", ""); + pathOperation.run(tempFile); + Files.move(tempFile, path, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } finally { + if (tempFile != null) { + Files.deleteIfExists(tempFile); + } + } + } + + private void atomicCopy(Path src, Path dst) throws IOException { + atomicWrite(dst, (tempFile) -> Files.copy(src, tempFile, + StandardCopyOption.REPLACE_EXISTING, + StandardCopyOption.COPY_ATTRIBUTES)); + } + + private void atomicWriteString(Path path, String content) throws IOException { + atomicWrite(path, (tempFile) -> Files.writeString(tempFile, content)); + } + + @FunctionalInterface + private interface PathOperation { + void run(Path path) throws IOException; + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxConfig.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxConfig.java new file mode 100644 index 00000000000..ffaa2b0bb60 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxConfig.java @@ -0,0 +1,116 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.nginx; + +import com.yahoo.vespa.hosted.routing.RoutingTable; +import com.yahoo.vespa.hosted.routing.RoutingTable.Real; +import com.yahoo.vespa.hosted.routing.status.RoutingStatus; + +import java.util.HashMap; +import java.util.Map; + +/** + * Converts a {@link RoutingTable} to Nginx's own config format. + * + * @author mpolden + */ +class NginxConfig { + + private NginxConfig() { + } + + public static String from(RoutingTable routingTable, RoutingStatus routingStatus) { + StringBuilder sb = new StringBuilder(); + + // Map SNI header to upstream + sb.append("map $ssl_preread_server_name $name {\n"); + routingTable.asMap().forEach((endpoint, target) -> { + sb.append(" ").append(endpoint.dnsName()).append(" ").append(target.id()).append(";\n"); + }); + + // Forward requests without SNI header directly to Nginx (e.g. VIP health checks) + sb.append(" '' default;\n"); + sb.append("}\n\n"); + + // Render routing table targets as upstreams + renderUpstreamsTo(sb, routingTable, routingStatus); + + // Configure the default upstream, which targets Nginx itself + sb.append("upstream default {\n"); + sb.append(" server localhost:4445;\n"); + sb.append(" ").append(checkDirective(4080)).append("\n"); + sb.append(" ").append(checkHttpSendDirective("localhost")).append("\n"); + sb.append("}\n\n"); + + // Listener port + sb.append("server {\n"); + sb.append(" listen 443 reuseport;\n"); + sb.append(" listen [::]:443 reuseport;\n"); + sb.append(" proxy_pass $name;\n"); + sb.append(" ssl_preread on;\n"); + sb.append(" proxy_protocol on;\n"); + sb.append("}\n"); + + return sb.toString(); + } + + private static String checkDirective(int port) { + // nginx_http_upstream_check_module does not support health checks over https + // a different http port is used instead, which acts as a http->https proxy for /status.html requests + return String.format("check interval=2000 fall=5 rise=2 timeout=3000 default_down=true type=http port=%d;", + port); + } + + private static String checkHttpSendDirective(String upstreamName) { + return "check_http_send \"" + + "GET /status.html HTTP/1.0\\r\\n" + + "Host: " + upstreamName + "\\r\\n" + + "\\r\\n\";"; + } + + private static void renderUpstreamsTo(StringBuilder sb, RoutingTable routingTable, RoutingStatus routingStatus) { + Map<Real, RoutingTable.Target> realTable = new HashMap<>(); + for (var target : routingTable.asMap().values()) { + if (target.applicationLevel()) continue; + for (var real : target.reals()) { + realTable.put(real, target); + } + } + routingTable.asMap().values().stream().sorted().distinct().forEach(target -> { + sb.append("upstream ").append(target.id()).append(" {").append("\n"); + + // Check if any target is active. + for (var real : target.reals()) { + boolean explicitRoutingActive = true; + // Check external status service if this is an application-level target + if (target.applicationLevel()) { + RoutingTable.Target targetOfReal = realTable.get(real); + explicitRoutingActive = routingStatus.isActive(targetOfReal.id()); + } + String serverParameter = serverParameter(target, real, explicitRoutingActive); + sb.append(" server ").append(real.hostname()).append(":4443").append(serverParameter).append(";\n"); + } + int healthCheckPort = 4082; + sb.append(" ").append(checkDirective(healthCheckPort)).append("\n"); + sb.append(" ").append(checkHttpSendDirective(target.id())).append("\n"); + sb.append(" random two;\n"); + sb.append("}\n\n"); + }); + } + + private static String serverParameter(RoutingTable.Target target, Real real, boolean routingActive) { + // For each real consider: + // * if not an application-level target -> no parameters + // * if active & routingActive = false AND the upstream contains at least one active host -> "down" + // * if weight assigned = 0 -> "backup" + // * if weight assigned > 0 -> "weight=<weight>" + if (!target.applicationLevel()) return ""; + if (!(real.active() && routingActive) && target.active()) return " down"; + int weight = real.weight(); + if (weight == 0) { + return " backup"; + } else { + return " weight=" + weight; + } + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxHealthClient.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxHealthClient.java new file mode 100644 index 00000000000..fdfd0f71e96 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxHealthClient.java @@ -0,0 +1,104 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.nginx; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.annotation.Inject; +import com.yahoo.lang.CachedSupplier; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.hosted.routing.status.HealthStatus; +import com.yahoo.vespa.hosted.routing.status.ServerGroup; +import com.yahoo.yolean.Exceptions; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.util.EntityUtils; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Client for the Nginx upstream health status page served at /health-status. + * + * @author oyving + * @author mpolden + */ +public class NginxHealthClient extends AbstractComponent implements HealthStatus { + + private static final URI healthStatusUrl = URI.create("http://localhost:4080/health-status/?format=json"); + private static final Duration requestTimeout = Duration.ofSeconds(5); + private static final Duration cacheTtl = Duration.ofSeconds(5); + + private final CloseableHttpClient httpClient; + private final CachedSupplier<ServerGroup> cache = new CachedSupplier<>(this::getStatus, cacheTtl); + + @Inject + public NginxHealthClient() { + this( + HttpClientBuilder.create() + .setDefaultRequestConfig(RequestConfig.custom() + .setConnectTimeout((int) requestTimeout.toMillis()) + .setConnectionRequestTimeout((int) requestTimeout.toMillis()) + .setSocketTimeout((int) requestTimeout.toMillis()) + .build()) + .build() + ); + } + + NginxHealthClient(CloseableHttpClient client) { + this.httpClient = Objects.requireNonNull(client); + } + + @Override + public ServerGroup servers() { + return cache.get(); + } + + private ServerGroup getStatus() { + HttpGet httpGet = new HttpGet(healthStatusUrl); + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + String entity = Exceptions.uncheck(() -> EntityUtils.toString(response.getEntity())); + if (response.getStatusLine().getStatusCode() / 100 != 2) { + throw new IllegalArgumentException("Got status code " + response.getStatusLine().getStatusCode() + + " for URL " + healthStatusUrl + ", with response: " + entity); + } + return parseStatus(entity); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static ServerGroup parseStatus(String json) { + Slime slime = SlimeUtils.jsonToSlime(json); + Cursor root = slime.get(); + List<ServerGroup.Server> servers = new ArrayList<>(); + Cursor serversObject = root.field("servers"); + + Cursor streamArray = serversObject.field("stream"); + Cursor serverArray = serversObject.field("server"); // TODO(mpolden): Remove after 2022-03-01 + Cursor array = streamArray.valid() ? streamArray : serverArray; + + array.traverse((ArrayTraverser) (idx, inspector) -> { + String upstreamName = inspector.field("upstream").asString(); + String hostPort = inspector.field("name").asString(); + boolean up = "up".equals(inspector.field("status").asString()); + servers.add(new ServerGroup.Server(upstreamName, hostPort, up)); + }); + return new ServerGroup(servers); + } + + @Override + public void deconstruct() { + Exceptions.uncheck(httpClient::close); + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxMetricsReporter.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxMetricsReporter.java new file mode 100644 index 00000000000..79381b8c99e --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxMetricsReporter.java @@ -0,0 +1,191 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.nginx; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.cloud.config.ApplicationIdConfig; +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.annotation.Inject; +import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.routing.RoutingGenerator; +import com.yahoo.vespa.hosted.routing.RoutingTable; +import com.yahoo.vespa.hosted.routing.status.HealthStatus; +import com.yahoo.vespa.hosted.routing.status.ServerGroup; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Report Nginx metrics periodically. + * + * @author mortent + * @author mpolden + */ +public class NginxMetricsReporter extends AbstractComponent implements Runnable { + + private static final Duration interval = Duration.ofSeconds(20); + + static final String UPSTREAM_UP_METRIC = "nginx.upstreams.up"; + static final String UPSTREAM_DOWN_METRIC = "nginx.upstreams.down"; + static final String UPSTREAM_UNKNOWN_METRIC = "nginx.upstreams.unknown"; + static final String CONFIG_AGE_METRIC = "upstreams_configuration_age"; + + private final Metric metric; + private final HealthStatus healthStatus; + private final ApplicationId routingApplication; + private final FileSystem fileSystem; + private final ScheduledExecutorService service; + private final Supplier<Optional<RoutingTable>> tableSupplier; + + @Inject + public NginxMetricsReporter(ApplicationIdConfig applicationId, Metric metric, HealthStatus healthStatus, RoutingGenerator routingGenerator) { + this(new ApplicationId(applicationId), metric, healthStatus, FileSystems.getDefault(), interval, routingGenerator::routingTable); + } + + NginxMetricsReporter(ApplicationId application, Metric metric, HealthStatus healthStatus, FileSystem fileSystem, Duration interval, + Supplier<Optional<RoutingTable>> tableSupplier) { + this.metric = Objects.requireNonNull(metric); + this.healthStatus = Objects.requireNonNull(healthStatus); + this.routingApplication = Objects.requireNonNull(application); + this.fileSystem = Objects.requireNonNull(fileSystem); + this.tableSupplier = Objects.requireNonNull(tableSupplier); + this.service = Executors.newSingleThreadScheduledExecutor(new DaemonThreadFactory("nginx-metrics-reporter")); + this.service.scheduleAtFixedRate(this, interval.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS); + } + + @Override + public void run() { + Optional<RoutingTable> table = tableSupplier.get(); + table.ifPresent(this::reportHealth); + reportConfigAge(); + } + + private void reportConfigAge() { + Path temporaryNginxConfiguration = NginxPath.temporaryConfig.in(fileSystem); + Path nginxConfiguration = NginxPath.config.in(fileSystem); + Optional<Instant> temporaryConfigModified = lastModified(temporaryNginxConfiguration); + if (temporaryConfigModified.isEmpty()) { + metric.set(CONFIG_AGE_METRIC, 0, metric.createContext(Map.of())); + return; + } + Instant configModified = lastModified(nginxConfiguration).orElse(Instant.EPOCH); + long secondsDiff = Math.abs(Duration.between(configModified, temporaryConfigModified.get()).toSeconds()); + metric.set(CONFIG_AGE_METRIC, secondsDiff, metric.createContext(Map.of())); + } + + private void reportHealth(RoutingTable table) { + Collection<RoutingTable.Target> targets = table.asMap().values(); + Map<String, List<ServerGroup.Server>> status = healthStatus.servers().asMap(); + targets.forEach(service -> { + List<ServerGroup.Server> serversOfUpstream = status.get(service.id()); + if (serversOfUpstream != null) { + reportMetrics(service, serversOfUpstream); + } else { + reportMetricsUnknown(service); + } + }); + + Set<String> knownUpstreams = targets.stream().map(RoutingTable.Target::id).collect(Collectors.toSet()); + long unknownUpstreamCount = status.keySet().stream() + .filter(upstreamName -> !knownUpstreams.contains(upstreamName)) + .count(); + reportMetricsUnknown(unknownUpstreamCount); + } + + // We report a target as unknown if there is no trace of it in the health check yet. This might not be an issue + // (the health check status is a cache), but if it lasts for a long time it might be an error. + private void reportMetricsUnknown(RoutingTable.Target target) { + var dimensions = metricsDimensionsForService(target); + var context = metric.createContext(dimensions); + metric.set(UPSTREAM_UP_METRIC, 0L, context); + metric.set(UPSTREAM_DOWN_METRIC, 0L, context); + metric.set(UPSTREAM_UNKNOWN_METRIC, 1L, context); + } + + // This happens if an application is mentioned in the health check cache, but is not present + // in the routing table. We report this to the routing application, as we don't have anywhere + // else to put the data. + private void reportMetricsUnknown(long count) { + var dimensions = ImmutableMap.of( + "tenantName", routingApplication.tenant().value(), + "app", String.format("%s.%s", routingApplication.application().value(), routingApplication.instance().value()), + "applicationId", routingApplication.toFullString(), + "clusterid", "routing" + ); + var context = metric.createContext(dimensions); + metric.set(UPSTREAM_UNKNOWN_METRIC, count, context); + } + + private void reportMetrics(RoutingTable.Target target, List<ServerGroup.Server> servers) { + long up = countStatus(servers, true); + long down = countStatus(servers, false); + + var dimensions = metricsDimensionsForService(target); + var context = metric.createContext(dimensions); + metric.set(UPSTREAM_UP_METRIC, up, context); + metric.set(UPSTREAM_DOWN_METRIC, down, context); + metric.set(UPSTREAM_UNKNOWN_METRIC, 0L, context); + } + + private Map<String, String> metricsDimensionsForService(RoutingTable.Target target) { + String applicationId = target.tenant().value() + "." + target.application().value(); + String app = target.application().value(); + if (target.instance().isPresent()) { + app += "." + target.instance().get().value(); + applicationId += "." + target.instance().get().value(); + } + return ImmutableMap.of( + "tenantName", target.tenant().value(), + "app", app, + "applicationId", applicationId, + "clusterid", target.cluster().value() + ); + } + + private long countStatus(List<ServerGroup.Server> upstreams, boolean up) { + return upstreams.stream().filter(nginxServer -> up == nginxServer.up()).count(); + } + + private static Optional<Instant> lastModified(Path path) { + try { + return Optional.ofNullable(Files.getLastModifiedTime(path).toInstant()); + } catch (NoSuchFileException e) { + return Optional.empty(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public void deconstruct() { + Duration timeout = Duration.ofSeconds(10); + service.shutdown(); + try { + if (!service.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS)) { + throw new RuntimeException("Failed to shutdown executor within " + timeout); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxPath.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxPath.java new file mode 100644 index 00000000000..0cde7725260 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/nginx/NginxPath.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.nginx; + +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * File system paths used by Nginx. + * + * @author mpolden + */ +enum NginxPath { + + root("/opt/vespa/var/vespa-hosted/routing", null), + config("nginxl4.conf", root), + temporaryConfig("nginxl4.conf.tmp", root); + + public static final DateTimeFormatter ROTATED_SUFFIX_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd-HH:mm:ss.SSS"); + + private final String path; + + NginxPath(String path, NginxPath parent) { + if (parent == null) { + if (path.endsWith("/")) throw new IllegalArgumentException("Path should not end with '/', got '" + path + "'"); + this.path = path; + } else { + if (path.contains("/")) throw new IllegalArgumentException("Filename should not contain '/', got '" + path + "'"); + this.path = parent.path + "/" + path; + } + } + + /** Returns the path to this, bound to given file system */ + public Path in(FileSystem fileSystem) { + return fileSystem.getPath(path); + } + + /** Returns the rotated path of this with given instant, bound to given file system */ + public Path rotatedIn(FileSystem fileSystem, Instant instant) { + LocalDateTime dateTime = LocalDateTime.ofInstant(instant, ZoneOffset.UTC); + return fileSystem.getPath(path + "-" + ROTATED_SUFFIX_FORMAT.format(dateTime)); + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/AkamaiHandler.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/AkamaiHandler.java new file mode 100644 index 00000000000..e4507edd850 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/AkamaiHandler.java @@ -0,0 +1,108 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.restapi; + +import com.yahoo.component.annotation.Inject; +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.container.jdisc.HttpResponse; +import com.yahoo.container.jdisc.ThreadedHttpRequestHandler; +import com.yahoo.restapi.ErrorResponse; +import com.yahoo.restapi.Path; +import com.yahoo.restapi.SlimeJsonResponse; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.routing.RoutingGenerator; +import com.yahoo.vespa.hosted.routing.RoutingTable; +import com.yahoo.vespa.hosted.routing.status.HealthStatus; +import com.yahoo.vespa.hosted.routing.status.RoutingStatus; + +import java.util.Objects; +import java.util.Optional; +import java.util.function.Supplier; + +/** + * This handler implements the /akamai health check. + * + * The global routing service polls /akamai to determine if a deployment should receive requests via its global + * endpoint. + * + * @author oyving + * @author mpolden + * @author Torbjorn Smorgrav + * @author Wacek Kusnierczyk + */ +public class AkamaiHandler extends ThreadedHttpRequestHandler { + + public static final String + ROTATION_UNKNOWN_MESSAGE = "Rotation not found", + ROTATION_UNAVAILABLE_MESSAGE = "Rotation set unavailable", + ROTATION_UNHEALTHY_MESSAGE = "Rotation unhealthy", + ROTATION_INACTIVE_MESSAGE = "Rotation not available", + ROTATION_OK_MESSAGE = "Rotation OK"; + + private final RoutingStatus routingStatus; + private final HealthStatus healthStatus; + private final Supplier<Optional<RoutingTable>> tableSupplier; + + @Inject + public AkamaiHandler(Context parentCtx, + RoutingGenerator routingGenerator, + RoutingStatus routingStatus, + HealthStatus healthStatus) { + this(parentCtx, routingGenerator::routingTable, routingStatus, healthStatus); + } + + AkamaiHandler(Context parentCtx, + Supplier<Optional<RoutingTable>> tableSupplier, + RoutingStatus routingStatus, + HealthStatus healthStatus) { + super(parentCtx); + this.routingStatus = Objects.requireNonNull(routingStatus); + this.healthStatus = Objects.requireNonNull(healthStatus); + this.tableSupplier = Objects.requireNonNull(tableSupplier); + } + + @Override + public HttpResponse handle(HttpRequest request) { + Path path = new Path(request.getUri()); + if (path.matches("/akamai/v1/status")) { + return status(request); + } + return ErrorResponse.notFoundError("Nothing at " + path); + } + + private HttpResponse status(HttpRequest request) { + String hostHeader = request.getHeader("host"); + String hostname = withoutPort(hostHeader); + Optional<RoutingTable.Target> target = tableSupplier.get().flatMap(table -> table.targetOf(hostname)); + + if (target.isEmpty()) + return response(404, hostHeader, "", ROTATION_UNKNOWN_MESSAGE); + + if (!target.get().active()) + return response(404, hostHeader, "", ROTATION_INACTIVE_MESSAGE); + + String upstreamName = target.get().id(); + + if (!routingStatus.isActive(upstreamName)) + return response(404, hostHeader, upstreamName, ROTATION_UNAVAILABLE_MESSAGE); + + if (!healthStatus.servers().isHealthy(upstreamName)) + return response(502, hostHeader, upstreamName, ROTATION_UNHEALTHY_MESSAGE); + + return response(200, hostHeader, upstreamName, ROTATION_OK_MESSAGE); + } + + private static HttpResponse response(int status, String hostname, String name, String message) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + root.setString("hostname", hostname); + root.setString("upstream", name); + root.setString("message", message); + return new SlimeJsonResponse(status, slime); + } + + private static String withoutPort(String hostHeader) { + return hostHeader.replaceFirst("(:[\\d]+)?$", ""); + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/package-info.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/package-info.java new file mode 100644 index 00000000000..6a1d7f8234e --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/restapi/package-info.java @@ -0,0 +1,8 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author mpolden + */ +@ExportPackage +package com.yahoo.vespa.hosted.routing.restapi; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/HealthStatus.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/HealthStatus.java new file mode 100644 index 00000000000..e5d50390011 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/HealthStatus.java @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.status; + +/** + * Interface for accessing the health status of servers behind a router/reverse proxy. + * +* @author oyving +*/ +// TODO(mpolden): Make this a part of the future Router interface +public interface HealthStatus { + + /** Returns status of all servers */ + ServerGroup servers(); + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatus.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatus.java new file mode 100644 index 00000000000..9c030aeb100 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatus.java @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.status; + +/** + * Interface for accessing the global routing status of an upstream server. + * +* @author oyving +*/ +public interface RoutingStatus { + + /** Returns whether the given upstream name is active in global routing */ + boolean isActive(String upstreamName); + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatusClient.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatusClient.java new file mode 100644 index 00000000000..a4a37277f5a --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/RoutingStatusClient.java @@ -0,0 +1,138 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.status; + +import com.google.inject.Inject; +import com.yahoo.component.AbstractComponent; +import com.yahoo.lang.CachedSupplier; +import com.yahoo.routing.config.ZoneConfig; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.athenz.api.AthenzService; +import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; +import com.yahoo.vespa.athenz.tls.AthenzIdentityVerifier; +import com.yahoo.yolean.Exceptions; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.util.EntityUtils; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; +import java.time.Duration; +import java.util.Objects; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * Caching client for the /routing/v1/status API on the config server. That API decides if a deployment (or entire zone) + * is explicitly disabled in any global endpoints. + * + * This caches the status for a brief period to avoid drowning config servers with requests from health check pollers. + * + * @author oyving + * @author andreer + * @author mpolden + */ +public class RoutingStatusClient extends AbstractComponent implements RoutingStatus { + + private static final Logger log = Logger.getLogger(RoutingStatusClient.class.getName()); + private static final Duration requestTimeout = Duration.ofSeconds(2); + private static final Duration cacheTtl = Duration.ofSeconds(5); + + private final CloseableHttpClient httpClient; + private final URI configServerVip; + private final CachedSupplier<Status> cache = new CachedSupplier<>(this::status, cacheTtl); + + @Inject + public RoutingStatusClient(ZoneConfig config, ServiceIdentityProvider provider) { + this( + HttpClientBuilder.create() + .setDefaultRequestConfig(RequestConfig.custom() + .setConnectTimeout((int) requestTimeout.toMillis()) + .setConnectionRequestTimeout((int) requestTimeout.toMillis()) + .setSocketTimeout((int) requestTimeout.toMillis()) + .build()) + .setSSLContext(provider.getIdentitySslContext()) + .setSSLHostnameVerifier(createHostnameVerifier(config)) + .setUserAgent("hosted-vespa-routing-status-client") + .build(), + URI.create(config.configserverVipUrl()) + ); + } + + public RoutingStatusClient(CloseableHttpClient httpClient, URI configServerVip) { + this.httpClient = Objects.requireNonNull(httpClient); + this.configServerVip = Objects.requireNonNull(configServerVip); + } + + @Override + public boolean isActive(String upstreamName) { + try { + return cache.get().isActive(upstreamName); + } catch (Exception e) { + log.log(Level.WARNING, "Failed to get status for '" + upstreamName + "'", e); + return true; // Assume IN if cache update fails + } + } + + @Override + public void deconstruct() { + Exceptions.uncheck(httpClient::close); + } + + void invalidateCache() { + cache.invalidate(); + } + + private Status status() { + Set<String> inactiveDeployments = SlimeUtils.entriesStream(get("/routing/v1/status").get()) + .map(Inspector::asString) + .collect(Collectors.toUnmodifiableSet()); + boolean zoneActive = get("/routing/v1/status/zone").get().field("status").asString() + .equalsIgnoreCase("in"); + return new Status(zoneActive, inactiveDeployments); + } + + private Slime get(String path) { + URI url = configServerVip.resolve(path); + HttpGet httpGet = new HttpGet(url); + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + String entity = Exceptions.uncheck(() -> EntityUtils.toString(response.getEntity())); + if (response.getStatusLine().getStatusCode() / 100 != 2) { + throw new IllegalArgumentException("Got status code " + response.getStatusLine().getStatusCode() + + " for URL " + url + ", with response: " + entity); + } + return SlimeUtils.jsonToSlime(entity); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static AthenzIdentityVerifier createHostnameVerifier(ZoneConfig config) { + return new AthenzIdentityVerifier(Set.of(new AthenzService(config.configserverAthenzDomain(), + config.configserverAthenzServiceName()))); + } + + private static class Status { + + private final boolean zoneActive; + private final Set<String> inactiveDeployments; + + public Status(boolean zoneActive, Set<String> inactiveDeployments) { + this.zoneActive = zoneActive; + this.inactiveDeployments = Set.copyOf(Objects.requireNonNull(inactiveDeployments)); + } + + public boolean isActive(String upstreamName) { + return zoneActive && !inactiveDeployments.contains(upstreamName); + } + + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/ServerGroup.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/ServerGroup.java new file mode 100644 index 00000000000..f1a87aa7106 --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/ServerGroup.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.routing.status; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * A group servers behind a router/reverse proxy. + * + * @author mpolden + */ +public class ServerGroup { + + private static final double requiredUpFraction = 0.25D; + + private final Map<String, List<Server>> servers; + + public ServerGroup(List<Server> servers) { + this.servers = servers.stream().collect(Collectors.collectingAndThen(Collectors.groupingBy(Server::upstreamName), + Collections::unmodifiableMap)); + } + + public Map<String, List<Server>> asMap() { + return servers; + } + + /** Returns whether given upstream is healthy */ + public boolean isHealthy(String upstreamName) { + // TODO(mpolden): Look up key directly here once layer 4 config (and thus "-feed" upstreams) are gone + List<Server> upstreamServers = servers.values().stream() + .flatMap(Collection::stream) + .filter(server -> upstreamName.startsWith(server.upstreamName())) + .collect(Collectors.toList()); + long upCount = upstreamServers.stream() + .filter(Server::up) + .count(); + return upCount > upstreamServers.size() * requiredUpFraction; + } + + public static class Server { + + private final String upstreamName; + private final String hostport; + private final boolean up; + + public Server(String upstreamName, String hostport, boolean up) { + this.upstreamName = upstreamName; + this.hostport = hostport; + this.up = up; + } + + public String upstreamName() { + return upstreamName; + } + + public String hostport() { + return hostport; + } + + public boolean up() { + return up; + } + + } + +} diff --git a/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/package-info.java b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/package-info.java new file mode 100644 index 00000000000..2cd9e6a141e --- /dev/null +++ b/routing-generator/src/main/java/com/yahoo/vespa/hosted/routing/status/package-info.java @@ -0,0 +1,8 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author mpolden + */ +@ExportPackage +package com.yahoo.vespa.hosted.routing.status; + +import com.yahoo.osgi.annotation.ExportPackage; |