aboutsummaryrefslogtreecommitdiffstats
path: root/config-provisioning/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'config-provisioning/src/main')
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java177
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java108
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java39
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java21
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java3
5 files changed, 297 insertions, 51 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java
new file mode 100644
index 00000000000..818a448187c
--- /dev/null
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/CapacityPolicies.java
@@ -0,0 +1,177 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.config.provision;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeResources.DiskSpeed;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+import static com.yahoo.config.provision.NodeResources.Architecture;
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Defines the policies for assigning cluster capacity in various environments.
+ *
+ * @author bratseth
+ */
+public class CapacityPolicies {
+
+ private final Zone zone;
+ private final Exclusivity exclusivity;
+ private final ApplicationId applicationId;
+ private final Architecture adminClusterArchitecture;
+
+ public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Architecture adminClusterArchitecture) {
+ this.zone = zone;
+ this.exclusivity = exclusivity;
+ this.applicationId = applicationId;
+ this.adminClusterArchitecture = adminClusterArchitecture;
+ }
+
+ public Capacity applyOn(Capacity capacity, boolean exclusive) {
+ var min = applyOn(capacity.minResources(), capacity, exclusive);
+ var max = applyOn(capacity.maxResources(), capacity, exclusive);
+ var groupSize = capacity.groupSize().fromAtMost(max.nodes() / min.groups())
+ .toAtLeast(min.nodes() / max.groups());
+ return capacity.withLimits(min, max, groupSize);
+ }
+
+ private ClusterResources applyOn(ClusterResources resources, Capacity capacity, boolean exclusive) {
+ int nodes = decideCount(resources.nodes(), capacity.isRequired(), applicationId.instance().isTester());
+ int groups = decideGroups(resources.nodes(), resources.groups(), nodes);
+ var nodeResources = decideNodeResources(resources.nodeResources(), capacity.isRequired(), exclusive);
+ return new ClusterResources(nodes, groups, nodeResources);
+ }
+
+ private int decideCount(int requested, boolean required, boolean isTester) {
+ if (isTester) return 1;
+
+ if (required) return requested;
+ return switch (zone.environment()) {
+ case dev, test -> 1;
+ case perf -> Math.min(requested, 3);
+ case staging -> requested <= 1 ? requested : Math.max(2, requested / 10);
+ case prod -> requested;
+ };
+ }
+
+ private int decideGroups(int requestedNodes, int requestedGroups, int decidedNodes) {
+ if (requestedNodes == decidedNodes) return requestedGroups;
+ int groups = Math.min(requestedGroups, decidedNodes); // cannot have more groups than nodes
+ while (groups > 1 && decidedNodes % groups != 0)
+ groups--; // Must be divisible by the number of groups
+ return groups;
+ }
+
+ private NodeResources decideNodeResources(NodeResources target, boolean required, boolean exclusive) {
+ if (required || exclusive) return target; // Cannot downsize if resources are required, or exclusively allocated
+ if (target.isUnspecified()) return target; // Cannot be modified
+
+ if (zone.environment() == Environment.dev && zone.cloud().allowHostSharing()) {
+ // Dev does not cap the cpu or network of containers since usage is spotty: Allocate just a small amount exclusively
+ target = target.withVcpu(0.1).withBandwidthGbps(0.1);
+
+ // Allocate without GPU in dev
+ target = target.with(NodeResources.GpuResources.zero());
+ }
+
+ // Allow slow storage in zones which are not performance sensitive
+ if (zone.system().isCd() || zone.environment() == Environment.dev || zone.environment() == Environment.test)
+ target = target.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any).withBandwidthGbps(0.1);
+
+ return target;
+ }
+
+ public ClusterResources specifyFully(ClusterResources resources, ClusterSpec clusterSpec) {
+ return resources.with(specifyFully(resources.nodeResources(), clusterSpec));
+ }
+
+ public NodeResources specifyFully(NodeResources resources, ClusterSpec clusterSpec) {
+ return resources.withUnspecifiedFieldsFrom(defaultResources(clusterSpec).with(DiskSpeed.any));
+ }
+
+ private NodeResources defaultResources(ClusterSpec clusterSpec) {
+ if (clusterSpec.type() == ClusterSpec.Type.admin) {
+ if (exclusivity.allocation(clusterSpec)) {
+ return smallestExclusiveResources().with(adminClusterArchitecture);
+ }
+
+ if (clusterSpec.id().value().equals("cluster-controllers")) {
+ return clusterControllerResources(clusterSpec, adminClusterArchitecture).with(adminClusterArchitecture);
+ }
+
+ if (clusterSpec.id().value().equals("logserver")) {
+ return logserverResources(adminClusterArchitecture).with(adminClusterArchitecture);
+ }
+
+ return versioned(clusterSpec, Map.of(new Version(0), smallestSharedResources())).with(adminClusterArchitecture);
+ }
+
+ if (clusterSpec.type() == ClusterSpec.Type.content) {
+ // When changing defaults here update cloud.vespa.ai/en/reference/services
+ return zone.cloud().dynamicProvisioning()
+ ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2, 16, 300, 0.3)))
+ : versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 8, 50, 0.3)));
+ }
+ else {
+ // When changing defaults here update cloud.vespa.ai/en/reference/services
+ return zone.cloud().dynamicProvisioning()
+ ? versioned(clusterSpec, Map.of(new Version(0), new NodeResources(2.0, 8, 50, 0.3)))
+ : versioned(clusterSpec, Map.of(new Version(0), new NodeResources(1.5, 8, 50, 0.3)));
+ }
+ }
+
+ private NodeResources clusterControllerResources(ClusterSpec clusterSpec, Architecture architecture) {
+ // 1.32 fits floor(8/1.32) = 6 cluster controllers on each 8Gb host, and each will have
+ // 1.32-(0.7+0.6)*(1.32/8) = 1.1 Gb real memory given current taxes.
+ if (architecture == Architecture.x86_64)
+ return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(0.25, 1.32, 10, 0.3)));
+ else
+ // arm64 nodes need more memory
+ return versioned(clusterSpec, Map.of(new Version(0), new NodeResources(0.25, 1.50, 10, 0.3)));
+ }
+
+ private NodeResources logserverResources(Architecture architecture) {
+ if (zone.cloud().name() == CloudName.AZURE)
+ return new NodeResources(2, 4, 50, 0.3);
+
+ if (zone.cloud().name() == CloudName.GCP)
+ return new NodeResources(1, 4, 50, 0.3);
+
+ return architecture == Architecture.arm64
+ ? new NodeResources(0.5, 2.5, 50, 0.3)
+ : new NodeResources(0.5, 2, 50, 0.3);
+ }
+
+ // The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists)
+ private NodeResources smallestExclusiveResources() {
+ return zone.cloud().name() == CloudName.AZURE || zone.cloud().name() == CloudName.GCP
+ ? new NodeResources(2, 8, 50, 0.3)
+ : new NodeResources(0.5, 8, 50, 0.3);
+ }
+
+ // The lowest amount of resources that can be shared (i.e. a matching host flavor for this exists)
+ private NodeResources smallestSharedResources() {
+ return zone.cloud().name() == CloudName.GCP
+ ? new NodeResources(1, 4, 50, 0.3)
+ : new NodeResources(0.5, 2, 50, 0.3);
+ }
+
+ /** Returns whether the nodes requested can share physical host with other applications */
+ public ClusterSpec decideExclusivity(Capacity capacity, ClusterSpec requestedCluster) {
+ if (capacity.cloudAccount().isPresent()) return requestedCluster.withExclusivity(true); // Implicit exclusive
+ boolean exclusive = requestedCluster.isExclusive() && (capacity.isRequired() || zone.environment() == Environment.prod);
+ return requestedCluster.withExclusivity(exclusive);
+ }
+
+ /**
+ * Returns the resources for the newest version not newer than that requested in the cluster spec.
+ */
+ private static NodeResources versioned(ClusterSpec spec, Map<Version, NodeResources> resources) {
+ return requireNonNull(new TreeMap<>(resources).floorEntry(spec.vespaVersion()),
+ "no default resources applicable for " + spec + " among: " + resources)
+ .getValue();
+ }
+
+}
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java
index a8674d220d1..d7325a5fe92 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/EndpointsChecker.java
@@ -4,16 +4,20 @@ package com.yahoo.config.provision;
import ai.vespa.http.DomainName;
import ai.vespa.http.HttpURL;
+import javax.naming.NameNotFoundException;
import javax.naming.NamingException;
import javax.naming.directory.Attribute;
import javax.naming.directory.Attributes;
+import javax.naming.directory.DirContext;
import javax.naming.directory.InitialDirContext;
import java.net.InetAddress;
-import java.net.UnknownHostException;
+import java.util.ArrayList;
import java.util.Collections;
-import java.util.Enumeration;
+import java.util.HashSet;
import java.util.List;
+import java.util.Objects;
import java.util.Optional;
+import java.util.Set;
/**
* @author jonmv
@@ -35,9 +39,7 @@ public interface EndpointsChecker {
public static final Availability ready = new Availability(Status.available, "Endpoints are ready.");
}
- interface HostNameResolver { Optional<InetAddress> resolve(DomainName hostName); }
-
- interface CNameResolver { Optional<DomainName> resolve(DomainName hostName); }
+ interface NameResolver { List<String> resolve(NameType nameType, DomainName name); }
interface HealthChecker { Availability healthy(Endpoint endpoint); }
@@ -46,55 +48,54 @@ public interface EndpointsChecker {
}
static EndpointsChecker of(HealthChecker healthChecker) {
- return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveHostName, EndpointsChecker::resolveCname, healthChecker);
+ return zoneEndpoints -> endpointsAvailable(zoneEndpoints, EndpointsChecker::resolveAll, healthChecker);
}
- static EndpointsChecker mock(HostNameResolver hostNameResolver, CNameResolver cNameResolver, HealthChecker healthChecker) {
- return zoneEndpoints -> endpointsAvailable(zoneEndpoints, hostNameResolver, cNameResolver, healthChecker);
+ static EndpointsChecker mock(NameResolver resolver, HealthChecker healthChecker) {
+ return zoneEndpoints -> endpointsAvailable(zoneEndpoints, resolver, healthChecker);
}
Availability endpointsAvailable(List<Endpoint> zoneEndpoints);
private static Availability endpointsAvailable(List<Endpoint> zoneEndpoints,
- HostNameResolver hostNameResolver,
- CNameResolver cNameResolver,
+ NameResolver nameResolver,
HealthChecker healthChecker) {
if (zoneEndpoints.isEmpty())
return new Availability(Status.endpointsUnavailable, "Endpoints not yet ready.");
for (Endpoint endpoint : zoneEndpoints) {
- Optional<InetAddress> resolvedIpAddress = hostNameResolver.resolve(endpoint.url().domain());
- if (resolvedIpAddress.isEmpty())
+ Set<String> resolvedIpAddresses = resolveIpAddresses(endpoint.url().domain(), nameResolver);
+ if (resolvedIpAddresses.isEmpty())
return new Availability(Status.endpointsUnavailable, "DNS lookup yielded no IP address for '" + endpoint.url().domain() + "'.");
- if (resolvedIpAddress.equals(endpoint.ipAddress())) // We expect a certain IP address, and that's what we got, so we're good.
- continue;
-
- if (endpoint.ipAddress().isPresent()) // We expect a certain IP address, but that's not what we got.
+ if (endpoint.ipAddress().isPresent()) {
+ if (resolvedIpAddresses.contains(endpoint.ipAddress().get().getHostAddress())) {
+ continue; // Resolved addresses contain the expected endpoint IP address
+ }
return new Availability(Status.endpointsUnavailable,
- "IP address of '" + endpoint.url().domain() + "' (" +
- resolvedIpAddress.get().getHostAddress() + ") and load balancer " +
- "' (" + endpoint.ipAddress().get().getHostAddress() + ") are not equal");
+ "IP address(es) of '" + endpoint.url().domain() + "' (" +
+ resolvedIpAddresses + ") do not include load balancer IP " +
+ "' (" + endpoint.ipAddress().get().getHostAddress() + ")");
+ }
if (endpoint.canonicalName().isEmpty()) // We have no expected IP address, and no canonical name, so there's nothing more to check.
continue;
- Optional<DomainName> cNameValue = cNameResolver.resolve(endpoint.url().domain());
- if (cNameValue.filter(endpoint.canonicalName().get()::equals).isEmpty()) {
+ List<String> cnameAnswers = nameResolver.resolve(NameType.CNAME, endpoint.url().domain());
+ if (!cnameAnswers.contains(endpoint.canonicalName().get().value())) {
return new Availability(Status.endpointsUnavailable,
"CNAME '" + endpoint.url().domain() + "' points at " +
- cNameValue.map(name -> "'" + name + "'").orElse("nothing") +
+ cnameAnswers +
" but should point at load balancer " +
endpoint.canonicalName().map(name -> "'" + name + "'").orElse("nothing"));
}
- Optional<InetAddress> loadBalancerAddress = hostNameResolver.resolve(endpoint.canonicalName().get());
- if ( ! loadBalancerAddress.equals(resolvedIpAddress)) {
+ Set<String> loadBalancerAddresses = resolveIpAddresses(endpoint.canonicalName().get(), nameResolver);
+ if ( ! loadBalancerAddresses.equals(resolvedIpAddresses)) {
return new Availability(Status.endpointsUnavailable,
- "IP address of CNAME '" + endpoint.url().domain() + "' (" +
- resolvedIpAddress.get().getHostAddress() + ") and load balancer '" +
- endpoint.canonicalName().get() + "' (" +
- loadBalancerAddress.map(InetAddress::getHostAddress).orElse("empty") + ") are not equal");
+ "IP address(es) of CNAME '" + endpoint.url().domain() + "' (" +
+ resolvedIpAddresses + ") and load balancer '" +
+ endpoint.canonicalName().get() + "' (" + loadBalancerAddresses + ") are not equal");
}
}
@@ -107,38 +108,43 @@ public interface EndpointsChecker {
return availability;
}
- /** Returns the IP address of the given host name, if any. */
- private static Optional<InetAddress> resolveHostName(DomainName hostname) {
- try {
- return Optional.of(InetAddress.getByName(hostname.value()));
- }
- catch (UnknownHostException ignored) {
- return Optional.empty();
- }
+ private static Set<String> resolveIpAddresses(DomainName name, NameResolver nameResolver) {
+ Set<String> answers = new HashSet<>();
+ answers.addAll(nameResolver.resolve(NameType.A, name));
+ answers.addAll(nameResolver.resolve(NameType.AAAA, name));
+ return answers;
+ }
+
+ enum NameType {
+ A, AAAA, CNAME
}
- /** Returns the host name of the given CNAME, if any. */
- private static Optional<DomainName> resolveCname(DomainName endpoint) {
+ /** Returns all answers for given type and name. An empty list is returned if name does not exist (NXDOMAIN) */
+ private static List<String> resolveAll(NameType type, DomainName name) {
try {
- InitialDirContext ctx = new InitialDirContext();
+ DirContext ctx = new InitialDirContext();
try {
- Attributes attrs = ctx.getAttributes("dns:/" + endpoint.value(), new String[]{ "CNAME" });
- for (Attribute attribute : Collections.list(attrs.getAll())) {
- Enumeration<?> vals = attribute.getAll();
- if (vals.hasMoreElements()) {
- String hostname = vals.nextElement().toString();
- return Optional.of(hostname.substring(0, hostname.length() - 1)).map(DomainName::of);
- }
+ String entryType = type.name();
+ Attributes attributes = ctx.getAttributes("dns:/" + name, new String[]{entryType});
+ Attribute attribute = attributes.get(entryType);
+ if (attribute == null) {
+ return List.of();
}
- }
- finally {
+ List<String> results = new ArrayList<>();
+ attribute.getAll().asIterator().forEachRemaining(value -> {
+ String answer = Objects.toString(value);
+ answer = answer.endsWith(".") ? answer.substring(0, answer.length() - 1) : answer; // Trim trailing dot
+ results.add(answer);
+ });
+ return Collections.unmodifiableList(results);
+ } finally {
ctx.close();
}
- }
- catch (NamingException e) {
+ } catch (NameNotFoundException ignored) {
+ return List.of();
+ } catch (NamingException e) {
throw new RuntimeException(e);
}
- return Optional.empty();
}
}
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java
new file mode 100644
index 00000000000..6aafb20bee2
--- /dev/null
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Exclusivity.java
@@ -0,0 +1,39 @@
+package com.yahoo.config.provision;
+
+/**
+ * A class which can be asked if allocations should be exclusive.
+ *
+ * @author bratseth
+ */
+public class Exclusivity {
+
+ private final Zone zone;
+ private final SharedHosts sharedHost;
+
+ public Exclusivity(Zone zone, SharedHosts sharedHost) {
+ this.zone = zone;
+ this.sharedHost = sharedHost;
+ }
+
+ /** Returns whether nodes must be allocated to hosts that are exclusive to the cluster type. */
+ public boolean clusterType(ClusterSpec cluster) {
+ return sharedHost.hasClusterType(cluster.type());
+ }
+
+ /** Returns whether the nodes of this cluster must be running on hosts that are specifically provisioned for the application. */
+ public boolean provisioning(ClusterSpec clusterSpec) {
+ return !zone.cloud().allowHostSharing() && clusterSpec.isExclusive();
+ }
+
+ /**
+ * Returns whether nodes are allocated exclusively in this instance given this cluster spec.
+ * Exclusive allocation requires that the wanted node resources matches the advertised resources of the node
+ * perfectly.
+ */
+ public boolean allocation(ClusterSpec clusterSpec) {
+ return clusterSpec.isExclusive() ||
+ ( clusterSpec.type().isContainer() && zone.system().isPublic() && !zone.environment().isTest() ) ||
+ ( !zone.cloud().allowHostSharing() && !sharedHost.supportsClusterType(clusterSpec.type()));
+ }
+
+}
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java b/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java
new file mode 100644
index 00000000000..3c3ff8d7877
--- /dev/null
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/SharedHosts.java
@@ -0,0 +1,21 @@
+package com.yahoo.config.provision;
+
+/**
+ * @author hakonhall
+ */
+public interface SharedHosts {
+
+ /** Whether there are any shared hosts specifically for the given cluster type, or without a cluster type restriction. */
+ boolean supportsClusterType(ClusterSpec.Type clusterType);
+
+ /** Whether there are any shared hosts specifically for the given cluster type. */
+ boolean hasClusterType(ClusterSpec.Type clusterType);
+
+ static SharedHosts empty() {
+ return new SharedHosts() {
+ @Override public boolean supportsClusterType(ClusterSpec.Type clusterType) { return false; }
+ @Override public boolean hasClusterType(ClusterSpec.Type clusterType) { return false; }
+ };
+ }
+
+}
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java b/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java
index f73fec3ec68..094a7c5c003 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/SystemName.java
@@ -25,6 +25,7 @@ public enum SystemName {
/** Continuous deployment system for testing the Public system */
PublicCd(true, true),
+ PublicCdMigration(true, true),
/** Local development system */
dev(false, false);
@@ -48,6 +49,7 @@ public enum SystemName {
case "main": return main;
case "public": return Public;
case "publiccd": return PublicCd;
+ case "publiccdmigration": return PublicCdMigration;
default: throw new IllegalArgumentException(String.format("'%s' is not a valid system", value));
}
}
@@ -59,6 +61,7 @@ public enum SystemName {
case main: return "main";
case Public: return "public";
case PublicCd: return "publiccd";
+ case PublicCdMigration: return "publiccdmigration";
default : throw new IllegalStateException();
}
}