diff options
Diffstat (limited to 'config-provisioning/src/main')
5 files changed, 297 insertions, 51 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java new file mode 100644 index 00000000000..818a448187c --- /dev/null +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java @@ -0,0 +1,177 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.config.provision; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.NodeResources.DiskSpeed; + +import java.util.Map; +import java.util.TreeMap; + +import static com.yahoo.config.provision.NodeResources.Architecture; +import static java.util.Objects.requireNonNull; + +/** + * Defines the policies for assigning cluster capacity in various environments. + * + * @author bratseth + */ +public class CapacityPolicies { + + private final Zone zone; + private final Exclusivity exclusivity; + private final ApplicationId applicationId; + private final Architecture adminClusterArchitecture; + + public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Architecture adminClusterArchitecture) { + this.zone = zone; + this.exclusivity = exclusivity; + this.applicationId = applicationId; + this.adminClusterArchitecture = adminClusterArchitecture; + } + + public Capacity applyOn(Capacity capacity, boolean exclusive) { + var min = applyOn(capacity.minResources(), capacity, exclusive); + var max = applyOn(capacity.maxResources(), capacity, exclusive); + var groupSize = capacity.groupSize().fromAtMost(max.nodes() / min.groups()) + .toAtLeast(min.nodes() / max.groups()); + return capacity.withLimits(min, max, groupSize); + } + + private ClusterResources applyOn(ClusterResources resources, Capacity capacity, boolean exclusive) { + int nodes = decideCount(resources.nodes(), capacity.isRequired(), applicationId.instance().isTester()); + int groups = decideGroups(resources.nodes(), resources.groups(), nodes); + var nodeResources = decideNodeResources(resources.nodeResources(), capacity.isRequired(), exclusive); + return new ClusterResources(nodes, groups, nodeResources); + } + + private int decideCount(int requested, boolean required, boolean isTester) { + if (isTester) return 1; + + if (required) return requested; + return switch (zone.environment()) { + case dev, test -> 1; + case perf -> Math.min(requested, 3); + case staging -> requested <= 1 ? requested : Math.max(2, requested / 10); + case prod -> requested; + }; + } + + private int decideGroups(int requestedNodes, int requestedGroups, int decidedNodes) { + if (requestedNodes == decidedNodes) return requestedGroups; + int groups = Math.min(requestedGroups, decidedNodes); // cannot have more groups than nodes + while (groups > 1 && decidedNodes % groups != 0) + groups--; // Must be divisible by the number of groups + return groups; + } + + private NodeResources decideNodeResources(NodeResources target, boolean required, boolean exclusive) { + if (required || exclusive) return target; // Cannot downsize if resources are required, or exclusively allocated + if (target.isUnspecified()) return target; // Cannot be modified + + if (zone.environment() == Environment.dev && zone.cloud().allowHostSharing()) { + // Dev does not cap the cpu or network of containers since usage is spotty: Allocate just a small amount exclusively + target = target.withVcpu(0.1).withBandwidthGbps(0.1); + + // Allocate without GPU in dev + target = target.with(NodeResources.GpuResources.zero()); + } + + // Allow slow storage in zones which are not performance sensitive + if (zone.system().isCd() || zone.environment() == Environment.dev || zone.environment() == Environment.test) + target = target.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any).withBandwidthGbps(0.1); + + return target; + } + + public ClusterResources specifyFully(ClusterResources resources, ClusterSpec clusterSpec) { + return resources.with(specifyFully(resources.nodeResources(), clusterSpec)); + } + + public NodeResources specifyFully(NodeResources resources, ClusterSpec clusterSpec) { + return resources.withUnspecifiedFieldsFrom(defaultResources(clusterSpec).with(DiskSpeed.any)); + } + + private NodeResources defaultResources(ClusterSpec clusterSpec) { + if (clusterSpec.type() == ClusterSpec.Type.admin) { + if (exclusivity.allocation(clusterSpec)) { + return smallestExclusiveResources().with(adminClusterArchitecture); + } + + if (clusterSpec.id().value().equals("cluster-controllers")) { + return clusterControllerResources(clusterSpec, adminClusterArchitecture).with(adminClusterArchitecture); + } + + if (clusterSpec.id().value().equals("logserver")) { + return logserverResources(adminClusterArchitecture).with(adminClusterArchitecture); + } + + return versioned(clusterSpec, Map.of(new Version(0), smallestSharedResources())).with(adminClusterArchitecture); + } + + if (clusterSpec.type() == ClusterSpec.Type.content) { + // When changing defaults here update cloud.vespa.ai/en/reference/services + return zone.cloud().dynamicProvisioning() + ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2, 16, 300, 0.3))) + : versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 8, 50, 0.3))); + } + else { + // When changing defaults here update cloud.vespa.ai/en/reference/services + return zone.cloud().dynamicProvisioning() + ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2.0, 8, 50, 0.3))) + : versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 8, 50, 0.3))); + } + } + + private NodeResources clusterControllerResources(ClusterSpec clusterSpec, Architecture architecture) { + // 1.32 fits floor(8/1.32) = 6 cluster controllers on each 8Gb host, and each will have + // 1.32-(0.7+0.6)*(1.32/8) = 1.1 Gb real memory given current taxes. + if (architecture == Architecture.x86_64) + return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(0.25, 1.32, 10, 0.3))); + else + // arm64 nodes need more memory + return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(0.25, 1.50, 10, 0.3))); + } + + private NodeResources logserverResources(Architecture architecture) { + if (zone.cloud().name() == CloudName.AZURE) + return new NodeResources(2, 4, 50, 0.3); + + if (zone.cloud().name() == CloudName.GCP) + return new NodeResources(1, 4, 50, 0.3); + + return architecture == Architecture.arm64 + ? new NodeResources(0.5, 2.5, 50, 0.3) + : new NodeResources(0.5, 2, 50, 0.3); + } + + // The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists) + private NodeResources smallestExclusiveResources() { + return zone.cloud().name() == CloudName.AZURE || zone.cloud().name() == CloudName.GCP + ? new NodeResources(2, 8, 50, 0.3) + : new NodeResources(0.5, 8, 50, 0.3); + } + + // The lowest amount of resources that can be shared (i.e. a matching host flavor for this exists) + private NodeResources smallestSharedResources() { + return zone.cloud().name() == CloudName.GCP + ? new NodeResources(1, 4, 50, 0.3) + : new NodeResources(0.5, 2, 50, 0.3); + } + + /** Returns whether the nodes requested can share physical host with other applications */ + public ClusterSpec decideExclusivity(Capacity capacity, ClusterSpec requestedCluster) { + if (capacity.cloudAccount().isPresent()) return requestedCluster.withExclusivity(true); // Implicit exclusive + boolean exclusive = requestedCluster.isExclusive() && (capacity.isRequired() || zone.environment() == Environment.prod); + return requestedCluster.withExclusivity(exclusive); + } + + /** + * Returns the resources for the newest version not newer than that requested in the cluster spec. + */ + private static NodeResources versioned(ClusterSpec spec, Map<Version, NodeResources> resources) { + return requireNonNull(new TreeMap<>(resources).floorEntry(spec.vespaVersion()), + "no default resources applicable for " + spec + " among: " + resources) + .getValue(); + } + +} diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java index a8674d220d1..d7325a5fe92 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java @@ -4,16 +4,20 @@ package com.yahoo.config.provision; import ai.vespa.http.DomainName; import ai.vespa.http.HttpURL; +import javax.naming.NameNotFoundException; import javax.naming.NamingException; import javax.naming.directory.Attribute; import javax.naming.directory.Attributes; +import javax.naming.directory.DirContext; import javax.naming.directory.InitialDirContext; import java.net.InetAddress; -import java.net.UnknownHostException; +import java.util.ArrayList; import java.util.Collections; -import java.util.Enumeration; +import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Optional; +import java.util.Set; /** * @author jonmv @@ -35,9 +39,7 @@ public interface EndpointsChecker { public static final Availability ready = new Availability(Status.available, "Endpoints are ready."); } - interface HostNameResolver { Optional<InetAddress> resolve(DomainName hostName); } - - interface CNameResolver { Optional<DomainName> resolve(DomainName hostName); } + interface NameResolver { List<String> resolve(NameType nameType, DomainName name); } interface HealthChecker { Availability healthy(Endpoint endpoint); } @@ -46,55 +48,54 @@ public interface EndpointsChecker { } static EndpointsChecker of(HealthChecker healthChecker) { - return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveHostName, EndpointsChecker::resolveCname, healthChecker); + return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveAll, healthChecker); } - static EndpointsChecker mock(HostNameResolver hostNameResolver, CNameResolver cNameResolver, HealthChecker healthChecker) { - return zoneEndpoints -> endpointsAvailable(zoneEndpoints, hostNameResolver, cNameResolver, healthChecker); + static EndpointsChecker mock(NameResolver resolver, HealthChecker healthChecker) { + return zoneEndpoints -> endpointsAvailable(zoneEndpoints, resolver, healthChecker); } Availability endpointsAvailable(List<Endpoint> zoneEndpoints); private static Availability endpointsAvailable(List<Endpoint> zoneEndpoints, - HostNameResolver hostNameResolver, - CNameResolver cNameResolver, + NameResolver nameResolver, HealthChecker healthChecker) { if (zoneEndpoints.isEmpty()) return new Availability(Status.endpointsUnavailable, "Endpoints not yet ready."); for (Endpoint endpoint : zoneEndpoints) { - Optional<InetAddress> resolvedIpAddress = hostNameResolver.resolve(endpoint.url().domain()); - if (resolvedIpAddress.isEmpty()) + Set<String> resolvedIpAddresses = resolveIpAddresses(endpoint.url().domain(), nameResolver); + if (resolvedIpAddresses.isEmpty()) return new Availability(Status.endpointsUnavailable, "DNS lookup yielded no IP address for '" + endpoint.url().domain() + "'."); - if (resolvedIpAddress.equals(endpoint.ipAddress())) // We expect a certain IP address, and that's what we got, so we're good. - continue; - - if (endpoint.ipAddress().isPresent()) // We expect a certain IP address, but that's not what we got. + if (endpoint.ipAddress().isPresent()) { + if (resolvedIpAddresses.contains(endpoint.ipAddress().get().getHostAddress())) { + continue; // Resolved addresses contain the expected endpoint IP address + } return new Availability(Status.endpointsUnavailable, - "IP address of '" + endpoint.url().domain() + "' (" + - resolvedIpAddress.get().getHostAddress() + ") and load balancer " + - "' (" + endpoint.ipAddress().get().getHostAddress() + ") are not equal"); + "IP address(es) of '" + endpoint.url().domain() + "' (" + + resolvedIpAddresses + ") do not include load balancer IP " + + "' (" + endpoint.ipAddress().get().getHostAddress() + ")"); + } if (endpoint.canonicalName().isEmpty()) // We have no expected IP address, and no canonical name, so there's nothing more to check. continue; - Optional<DomainName> cNameValue = cNameResolver.resolve(endpoint.url().domain()); - if (cNameValue.filter(endpoint.canonicalName().get()::equals).isEmpty()) { + List<String> cnameAnswers = nameResolver.resolve(NameType.CNAME, endpoint.url().domain()); + if (!cnameAnswers.contains(endpoint.canonicalName().get().value())) { return new Availability(Status.endpointsUnavailable, "CNAME '" + endpoint.url().domain() + "' points at " + - cNameValue.map(name -> "'" + name + "'").orElse("nothing") + + cnameAnswers + " but should point at load balancer " + endpoint.canonicalName().map(name -> "'" + name + "'").orElse("nothing")); } - Optional<InetAddress> loadBalancerAddress = hostNameResolver.resolve(endpoint.canonicalName().get()); - if ( ! loadBalancerAddress.equals(resolvedIpAddress)) { + Set<String> loadBalancerAddresses = resolveIpAddresses(endpoint.canonicalName().get(), nameResolver); + if ( ! loadBalancerAddresses.equals(resolvedIpAddresses)) { return new Availability(Status.endpointsUnavailable, - "IP address of CNAME '" + endpoint.url().domain() + "' (" + - resolvedIpAddress.get().getHostAddress() + ") and load balancer '" + - endpoint.canonicalName().get() + "' (" + - loadBalancerAddress.map(InetAddress::getHostAddress).orElse("empty") + ") are not equal"); + "IP address(es) of CNAME '" + endpoint.url().domain() + "' (" + + resolvedIpAddresses + ") and load balancer '" + + endpoint.canonicalName().get() + "' (" + loadBalancerAddresses + ") are not equal"); } } @@ -107,38 +108,43 @@ public interface EndpointsChecker { return availability; } - /** Returns the IP address of the given host name, if any. */ - private static Optional<InetAddress> resolveHostName(DomainName hostname) { - try { - return Optional.of(InetAddress.getByName(hostname.value())); - } - catch (UnknownHostException ignored) { - return Optional.empty(); - } + private static Set<String> resolveIpAddresses(DomainName name, NameResolver nameResolver) { + Set<String> answers = new HashSet<>(); + answers.addAll(nameResolver.resolve(NameType.A, name)); + answers.addAll(nameResolver.resolve(NameType.AAAA, name)); + return answers; + } + + enum NameType { + A, AAAA, CNAME } - /** Returns the host name of the given CNAME, if any. */ - private static Optional<DomainName> resolveCname(DomainName endpoint) { + /** Returns all answers for given type and name. An empty list is returned if name does not exist (NXDOMAIN) */ + private static List<String> resolveAll(NameType type, DomainName name) { try { - InitialDirContext ctx = new InitialDirContext(); + DirContext ctx = new InitialDirContext(); try { - Attributes attrs = ctx.getAttributes("dns:/" + endpoint.value(), new String[]{ "CNAME" }); - for (Attribute attribute : Collections.list(attrs.getAll())) { - Enumeration<?> vals = attribute.getAll(); - if (vals.hasMoreElements()) { - String hostname = vals.nextElement().toString(); - return Optional.of(hostname.substring(0, hostname.length() - 1)).map(DomainName::of); - } + String entryType = type.name(); + Attributes attributes = ctx.getAttributes("dns:/" + name, new String[]{entryType}); + Attribute attribute = attributes.get(entryType); + if (attribute == null) { + return List.of(); } - } - finally { + List<String> results = new ArrayList<>(); + attribute.getAll().asIterator().forEachRemaining(value -> { + String answer = Objects.toString(value); + answer = answer.endsWith(".") ? answer.substring(0, answer.length() - 1) : answer; // Trim trailing dot + results.add(answer); + }); + return Collections.unmodifiableList(results); + } finally { ctx.close(); } - } - catch (NamingException e) { + } catch (NameNotFoundException ignored) { + return List.of(); + } catch (NamingException e) { throw new RuntimeException(e); } - return Optional.empty(); } } diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java new file mode 100644 index 00000000000..6aafb20bee2 --- /dev/null +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java @@ -0,0 +1,39 @@ +package com.yahoo.config.provision; + +/** + * A class which can be asked if allocations should be exclusive. + * + * @author bratseth + */ +public class Exclusivity { + + private final Zone zone; + private final SharedHosts sharedHost; + + public Exclusivity(Zone zone, SharedHosts sharedHost) { + this.zone = zone; + this.sharedHost = sharedHost; + } + + /** Returns whether nodes must be allocated to hosts that are exclusive to the cluster type. */ + public boolean clusterType(ClusterSpec cluster) { + return sharedHost.hasClusterType(cluster.type()); + } + + /** Returns whether the nodes of this cluster must be running on hosts that are specifically provisioned for the application. */ + public boolean provisioning(ClusterSpec clusterSpec) { + return !zone.cloud().allowHostSharing() && clusterSpec.isExclusive(); + } + + /** + * Returns whether nodes are allocated exclusively in this instance given this cluster spec. + * Exclusive allocation requires that the wanted node resources matches the advertised resources of the node + * perfectly. + */ + public boolean allocation(ClusterSpec clusterSpec) { + return clusterSpec.isExclusive() || + ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) || + ( !zone.cloud().allowHostSharing() && !sharedHost.supportsClusterType(clusterSpec.type())); + } + +} diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java b/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java new file mode 100644 index 00000000000..3c3ff8d7877 --- /dev/null +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java @@ -0,0 +1,21 @@ +package com.yahoo.config.provision; + +/** + * @author hakonhall + */ +public interface SharedHosts { + + /** Whether there are any shared hosts specifically for the given cluster type, or without a cluster type restriction. */ + boolean supportsClusterType(ClusterSpec.Type clusterType); + + /** Whether there are any shared hosts specifically for the given cluster type. */ + boolean hasClusterType(ClusterSpec.Type clusterType); + + static SharedHosts empty() { + return new SharedHosts() { + @Override public boolean supportsClusterType(ClusterSpec.Type clusterType) { return false; } + @Override public boolean hasClusterType(ClusterSpec.Type clusterType) { return false; } + }; + } + +} diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java b/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java index f73fec3ec68..094a7c5c003 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java @@ -25,6 +25,7 @@ public enum SystemName { /** Continuous deployment system for testing the Public system */ PublicCd(true, true), + PublicCdMigration(true, true), /** Local development system */ dev(false, false); @@ -48,6 +49,7 @@ public enum SystemName { case "main": return main; case "public": return Public; case "publiccd": return PublicCd; + case "publiccdmigration": return PublicCdMigration; default: throw new IllegalArgumentException(String.format("'%s' is not a valid system", value)); } } @@ -59,6 +61,7 @@ public enum SystemName { case main: return "main"; case Public: return "public"; case PublicCd: return "publiccd"; + case PublicCdMigration: return "publiccdmigration"; default : throw new IllegalStateException(); } } |