summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@verizonmedia.com>2020-11-27 08:17:28 +0100
committerGitHub <noreply@github.com>2020-11-27 08:17:28 +0100
commit6bd0b69876d001004512ccf5d3f9f9edfbe3d3f8 (patch)
treea28f961b88561b108e15539d60d40828f2071cff
parent2e8b2724c9f3729dc26f71568c0d4873c8617755 (diff)
parentabe161b509cde53da37d87faa44981a5208e7b00 (diff)
Merge pull request #15488 from vespa-engine/hakonhall/allow-preprovision-capacity-on-partially-filled-hosts
Allow preprovision capacity on partially filled hosts
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java2
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java18
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java89
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/custom/HostCapacity.java73
-rw-r--r--flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java41
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java181
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java26
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java288
12 files changed, 577 insertions, 173 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
index 71776a7641d..7d2a96ce991 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
@@ -101,7 +101,7 @@ public final class ClusterSpec {
private boolean exclusive = false;
private Optional<Id> combinedId = Optional.empty();
- Builder(Type type, Id id, boolean specification) {
+ private Builder(Type type, Id id, boolean specification) {
this.type = type;
this.id = id;
this.specification = specification;
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 0ecf957d1d9..f087714896b 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -3,7 +3,7 @@ package com.yahoo.vespa.flags;
import com.yahoo.component.Vtag;
import com.yahoo.vespa.defaults.Defaults;
-import com.yahoo.vespa.flags.custom.HostCapacity;
+import com.yahoo.vespa.flags.custom.ClusterCapacity;
import com.yahoo.vespa.flags.custom.SharedHost;
import java.util.List;
@@ -83,11 +83,17 @@ public class Flags {
"Takes effect on the next run of RetiredExpirer.",
HOSTNAME);
- public static final UnboundListFlag<HostCapacity> TARGET_CAPACITY = defineListFlag(
- "preprovision-capacity", List.of(), HostCapacity.class,
- "List of node resources and their count that should be provisioned." +
- "In a dynamically provisioned zone this specifies the unallocated (i.e. pre-provisioned) capacity. " +
- "Otherwise it specifies the total (unallocated or not) capacity.",
+ public static final UnboundListFlag<ClusterCapacity> PREPROVISION_CAPACITY = defineListFlag(
+ "preprovision-capacity", List.of(), ClusterCapacity.class,
+ "Specifies the resources that ought to be immediately available for additional cluster " +
+ "allocations. If the resources are not available, additional hosts will be provisioned. " +
+ "Only applies to dynamically provisioned zones.",
+ "Takes effect on next iteration of DynamicProvisioningMaintainer.");
+
+ public static final UnboundBooleanFlag COMPACT_PREPROVISION_CAPACITY = defineFeatureFlag(
+ "compact-preprovision-capacity", true,
+ "Whether preprovision capacity can be satisfied with available capacity on hosts with " +
+ "existing allocations. Historically preprovision-capacity referred to empty hosts.",
"Takes effect on next iteration of DynamicProvisioningMaintainer.");
public static final UnboundJacksonFlag<SharedHost> SHARED_HOST = defineJacksonFlag(
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java
new file mode 100644
index 00000000000..9992d1f9a53
--- /dev/null
+++ b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java
@@ -0,0 +1,89 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.flags.custom;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.Objects;
+import java.util.OptionalDouble;
+
+/**
+ * @author freva
+ */
+// @Immutable
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonInclude(value = JsonInclude.Include.NON_NULL)
+public class ClusterCapacity {
+ private final int count;
+ private final double vcpu;
+ private final double memoryGb;
+ private final double diskGb;
+ private final OptionalDouble bandwidthGbps;
+
+ @JsonCreator
+ public ClusterCapacity(@JsonProperty("count") int count,
+ @JsonProperty("vcpu") double vcpu,
+ @JsonProperty("memoryGb") double memoryGb,
+ @JsonProperty("diskGb") double diskGb,
+ @JsonProperty("bandwidthGbps") Double bandwidthGbps) {
+ this.count = (int) requireNonNegative("count", count);
+ this.vcpu = requireNonNegative("vcpu", vcpu);
+ this.memoryGb = requireNonNegative("memoryGb", memoryGb);
+ this.diskGb = requireNonNegative("diskGb", diskGb);
+ this.bandwidthGbps = bandwidthGbps == null ? OptionalDouble.empty() : OptionalDouble.of(bandwidthGbps);
+ }
+
+ /** Returns a new ClusterCapacity equal to {@code this}, but with the given count. */
+ public ClusterCapacity withCount(int count) {
+ return new ClusterCapacity(count, vcpu, memoryGb, diskGb, bandwidthGbpsOrNull());
+ }
+
+ @JsonGetter("count") public int count() { return count; }
+ @JsonGetter("vcpu") public double vcpu() { return vcpu; }
+ @JsonGetter("memoryGb") public double memoryGb() { return memoryGb; }
+ @JsonGetter("diskGb") public double diskGb() { return diskGb; }
+ @JsonGetter("bandwidthGbps") public Double bandwidthGbpsOrNull() {
+ return bandwidthGbps.isPresent() ? bandwidthGbps.getAsDouble() : null;
+ }
+
+ @JsonIgnore
+ public double bandwidthGbps() { return bandwidthGbps.orElse(1.0); }
+
+ @Override
+ public String toString() {
+ return "ClusterCapacity{" +
+ "count=" + count +
+ ", vcpu=" + vcpu +
+ ", memoryGb=" + memoryGb +
+ ", diskGb=" + diskGb +
+ ", bandwidthGbps=" + bandwidthGbps +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ClusterCapacity that = (ClusterCapacity) o;
+ return count == that.count &&
+ Double.compare(that.vcpu, vcpu) == 0 &&
+ Double.compare(that.memoryGb, memoryGb) == 0 &&
+ Double.compare(that.diskGb, diskGb) == 0 &&
+ bandwidthGbps.equals(that.bandwidthGbps);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(count, vcpu, memoryGb, diskGb, bandwidthGbps);
+ }
+
+ private static double requireNonNegative(String name, double value) {
+ if (value < 0)
+ throw new IllegalArgumentException("'" + name + "' must be positive, was " + value);
+ return value;
+ }
+}
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/custom/HostCapacity.java b/flags/src/main/java/com/yahoo/vespa/flags/custom/HostCapacity.java
deleted file mode 100644
index 947520ca2d7..00000000000
--- a/flags/src/main/java/com/yahoo/vespa/flags/custom/HostCapacity.java
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.flags.custom;
-
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.Objects;
-
-/**
- * @author freva
- */
-@JsonIgnoreProperties(ignoreUnknown = true)
-public class HostCapacity {
- @JsonProperty("vcpu")
- private final double vcpu;
-
- @JsonProperty("memoryGb")
- private final double memoryGb;
-
- @JsonProperty("diskGb")
- private final double diskGb;
-
- @JsonProperty("count")
- private final int count;
-
- public HostCapacity(@JsonProperty("vcpu") double vcpu,
- @JsonProperty("memoryGb") double memoryGb,
- @JsonProperty("diskGb") double diskGb,
- @JsonProperty("count") int count) {
- this.vcpu = requirePositive("vcpu", vcpu);
- this.memoryGb = requirePositive("memoryGb", memoryGb);
- this.diskGb = requirePositive("diskGb", diskGb);
- this.count = (int) requirePositive("count", count);
- }
-
- public double getVcpu() {
- return vcpu;
- }
-
- public double getMemoryGb() {
- return memoryGb;
- }
-
- public double getDiskGb() {
- return diskGb;
- }
-
- public int getCount() {
- return count;
- }
-
- private static double requirePositive(String name, double value) {
- if (value <= 0)
- throw new IllegalArgumentException("'" + name + "' must be positive, was " + value);
- return value;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
- HostCapacity that = (HostCapacity) o;
- return Double.compare(that.vcpu, vcpu) == 0 &&
- Double.compare(that.memoryGb, memoryGb) == 0 &&
- Double.compare(that.diskGb, diskGb) == 0 &&
- count == that.count;
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(vcpu, memoryGb, diskGb, count);
- }
-}
diff --git a/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java
new file mode 100644
index 00000000000..0258b562897
--- /dev/null
+++ b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java
@@ -0,0 +1,41 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.flags.custom;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class ClusterCapacityTest {
+ @Test
+ public void serialization() throws IOException {
+ ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, null);
+ ObjectMapper mapper = new ObjectMapper();
+ String json = mapper.writeValueAsString(clusterCapacity);
+ assertEquals("{\"count\":7,\"vcpu\":1.2,\"memoryGb\":3.4,\"diskGb\":5.6}", json);
+
+ ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class);
+ assertEquals(1.2, deserialized.vcpu(), 0.0001);
+ assertEquals(3.4, deserialized.memoryGb(), 0.0001);
+ assertEquals(5.6, deserialized.diskGb(), 0.0001);
+ assertEquals(1.0, deserialized.bandwidthGbps(), 0.0001);
+ assertEquals(7, deserialized.count());
+ }
+
+ @Test
+ public void serialization2() throws IOException {
+ ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, 2.3);
+ ObjectMapper mapper = new ObjectMapper();
+ String json = mapper.writeValueAsString(clusterCapacity);
+ assertEquals("{\"count\":7,\"vcpu\":1.2,\"memoryGb\":3.4,\"diskGb\":5.6,\"bandwidthGbps\":2.3}", json);
+
+ ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class);
+ assertEquals(1.2, deserialized.vcpu(), 0.0001);
+ assertEquals(3.4, deserialized.memoryGb(), 0.0001);
+ assertEquals(5.6, deserialized.diskGb(), 0.0001);
+ assertEquals(2.3, deserialized.bandwidthGbps(), 0.0001);
+ assertEquals(7, deserialized.count());
+ }
+} \ No newline at end of file
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 86795767710..70d9215aa8f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -884,11 +884,15 @@ public class NodeRepository extends AbstractComponent {
}
public boolean canAllocateTenantNodeTo(Node host) {
+ return canAllocateTenantNodeTo(host, zone.getCloud().dynamicProvisioning());
+ }
+
+ public static boolean canAllocateTenantNodeTo(Node host, boolean dynamicProvisioning) {
if ( ! host.type().canRun(NodeType.tenant)) return false;
if (host.status().wantToRetire()) return false;
if (host.allocation().map(alloc -> alloc.membership().retired()).orElse(false)) return false;
- if (zone.getCloud().dynamicProvisioning())
+ if (dynamicProvisioning)
return EnumSet.of(State.active, State.ready, State.provisioned).contains(host.state());
else
return host.state() == State.active;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 0195466b689..22d1e0333fb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -2,16 +2,22 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.component.Version;
+import com.yahoo.component.Vtag;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterMembership;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.OutOfCapacityException;
import com.yahoo.jdisc.Metric;
+import com.yahoo.lang.MutableInteger;
import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.ListFlag;
-import com.yahoo.vespa.flags.custom.HostCapacity;
+import com.yahoo.vespa.flags.custom.ClusterCapacity;
+import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -20,7 +26,10 @@ import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing;
+import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate;
+import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer;
import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator;
+import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
import com.yahoo.yolean.Exceptions;
@@ -32,6 +41,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.logging.Level;
@@ -48,7 +58,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
private static final Logger log = Logger.getLogger(DynamicProvisioningMaintainer.class.getName());
private final HostProvisioner hostProvisioner;
- private final ListFlag<HostCapacity> targetCapacityFlag;
+ private final ListFlag<ClusterCapacity> preprovisionCapacityFlag;
+ private final BooleanFlag compactPreprovisionCapacityFlag;
DynamicProvisioningMaintainer(NodeRepository nodeRepository,
Duration interval,
@@ -57,7 +68,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
Metric metric) {
super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
- this.targetCapacityFlag = Flags.TARGET_CAPACITY.bindTo(flagSource);
+ this.preprovisionCapacityFlag = Flags.PREPROVISION_CAPACITY.bindTo(flagSource);
+ this.compactPreprovisionCapacityFlag = Flags.COMPACT_PREPROVISION_CAPACITY.bindTo(flagSource);
}
@Override
@@ -104,8 +116,17 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
/** Converge zone to wanted capacity */
private void convergeToCapacity(NodeList nodes) {
- List<NodeResources> capacity = targetCapacity();
- List<Node> excessHosts = provision(capacity, nodes);
+ List<Node> excessHosts;
+ try {
+ excessHosts = provision(nodes);
+ } catch (OutOfCapacityException | IllegalStateException e) {
+ log.log(Level.WARNING, "Failed to provision preprovision capacity and/or find excess hosts: " + e.getMessage());
+ return; // avoid removing excess hosts
+ } catch (RuntimeException e) {
+ log.log(Level.WARNING, "Failed to provision preprovision capacity and/or find excess hosts", e);
+ return; // avoid removing excess hosts
+ }
+
excessHosts.forEach(host -> {
try {
hostProvisioner.deprovision(host);
@@ -119,13 +140,21 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
/**
* Provision hosts to ensure there is room to allocate spare nodes.
*
- * @param advertisedSpareCapacity the advertised resources of the spare nodes
* @param nodes list of all nodes
* @return excess hosts that can safely be deprovisioned: An excess host 1. contains no nodes allocated
* to an application, and assuming the spare nodes have been allocated, and 2. is not parked
* without wantToDeprovision (which means an operator is looking at the node).
*/
- private List<Node> provision(List<NodeResources> advertisedSpareCapacity, NodeList nodes) {
+ private List<Node> provision(NodeList nodes) {
+ boolean compactPreprovisionCapacity = compactPreprovisionCapacityFlag.value();
+ if (compactPreprovisionCapacity) {
+ return findExcessHosts(nodes);
+ } else {
+ return legacyProvision(nodes);
+ }
+ }
+
+ private List<Node> legacyProvision(NodeList nodes) {
Map<String, Node> hostsByHostname = new HashMap<>(nodes.hosts().asList().stream()
.filter(host -> host.state() != Node.State.parked || host.status().wantToDeprovision())
.collect(Collectors.toMap(Node::hostname, Function.identity())));
@@ -138,7 +167,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
List<Node> excessHosts = new ArrayList<>(hostsByHostname.values());
- var capacity = new ArrayList<>(advertisedSpareCapacity);
+ var capacity = new ArrayList<>(targetCapacity());
for (Iterator<NodeResources> it = capacity.iterator(); it.hasNext() && !excessHosts.isEmpty(); ) {
NodeResources resources = it.next();
excessHosts.stream()
@@ -174,17 +203,137 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
return excessHosts;
}
+ private List<Node> findExcessHosts(NodeList nodeList) {
+ final List<Node> nodes = provisionUntilNoDeficit(nodeList);
+
+ Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream()
+ .filter(node -> node.type() == NodeType.host)
+ .filter(host -> host.state() != Node.State.parked || host.status().wantToDeprovision())
+ .collect(Collectors.toMap(Node::hostname, Function.identity())));
+
+ nodes.stream()
+ .filter(node -> node.allocation().isPresent())
+ .flatMap(node -> node.parentHostname().stream())
+ .distinct()
+ .forEach(hostsByHostname::remove);
+
+ return List.copyOf(hostsByHostname.values());
+ }
+
+ /**
+ * @return the nodes in {@code nodeList} plus all hosts provisioned, plus all preprovision capacity
+ * nodes that were allocated.
+ * @throws OutOfCapacityException if there were problems provisioning hosts, and in case message
+ * should be sufficient (avoid no stack trace)
+ * @throws IllegalStateException if there was an algorithmic problem, and in case message
+ * should be sufficient (avoid no stack trace).
+ */
+ private List<Node> provisionUntilNoDeficit(NodeList nodeList) {
+ List<ClusterCapacity> preprovisionCapacity = preprovisionCapacityFlag.value();
+
+ // Worst-case each ClusterCapacity in preprovisionCapacity will require an allocation.
+ int maxProvisions = preprovisionCapacity.size();
+
+ var nodesPlusProvisioned = new ArrayList<>(nodeList.asList());
+ for (int numProvisions = 0;; ++numProvisions) {
+ var nodesPlusProvisionedPlusAllocated = new ArrayList<>(nodesPlusProvisioned);
+ Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated);
+ if (deficit.isEmpty()) {
+ return nodesPlusProvisionedPlusAllocated;
+ }
+
+ if (numProvisions >= maxProvisions) {
+ throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting");
+ }
+
+ try {
+ Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
+ List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(deficit.get().count());
+ List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, toNodeResources(deficit.get()),
+ ApplicationId.defaultId(), osVersion, HostSharing.shared)
+ .stream()
+ .map(ProvisionedHost::generateHost)
+ .collect(Collectors.toList());
+ nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
+ nodesPlusProvisioned.addAll(hosts);
+ } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) {
+ throw new OutOfCapacityException("Failed to pre-provision " + deficit.get() + ": " + e.getMessage());
+ } catch (RuntimeException e) {
+ throw new RuntimeException("Failed to pre-provision " + deficit.get() + ", will retry in " + interval(), e);
+ }
+ }
+ }
+
+ /**
+ * Try to allocate the preprovision cluster capacity.
+ *
+ * @param mutableNodes represents all nodes in the node repo. As preprovision capacity is virtually allocated
+ * they are added to {@code mutableNodes}
+ * @return the part of a cluster capacity it was unable to allocate, if any
+ */
+ private Optional<ClusterCapacity> allocatePreprovisionCapacity(List<ClusterCapacity> preprovisionCapacity,
+ ArrayList<Node> mutableNodes) {
+ for (int clusterIndex = 0; clusterIndex < preprovisionCapacity.size(); ++clusterIndex) {
+ ClusterCapacity clusterCapacity = preprovisionCapacity.get(clusterIndex);
+ LockedNodeList nodeList = new LockedNodeList(mutableNodes, () -> {});
+ List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, nodeList);
+ int deficit = Math.max(0, clusterCapacity.count() - candidates.size());
+ if (deficit > 0) {
+ return Optional.of(clusterCapacity.withCount(deficit));
+ }
+
+ // Simulate allocating the cluster
+ mutableNodes.addAll(candidates);
+ }
+
+ return Optional.empty();
+ }
+
+ private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList nodeList) {
+ NodeResources nodeResources = toNodeResources(clusterCapacity);
+
+ // We'll allocate each ClusterCapacity as a unique cluster in a dummy application
+ ApplicationId applicationId = ApplicationId.defaultId();
+ ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex));
+ ClusterSpec clusterSpec = ClusterSpec.request(ClusterSpec.Type.content, clusterId)
+ // build() requires a version, even though it is not (should not be) used
+ .vespaVersion(Vtag.currentVersion)
+ .build();
+ NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), nodeResources, false, true);
+ int wantedGroups = 1;
+
+ NodePrioritizer prioritizer = new NodePrioritizer(nodeList, applicationId, clusterSpec, nodeSpec, wantedGroups,
+ true, nodeRepository().nameResolver(), nodeRepository().resourcesCalculator(),
+ nodeRepository().spareCount());
+ List<NodeCandidate> nodeCandidates = prioritizer.collect(List.of());
+ MutableInteger index = new MutableInteger(0);
+ return nodeCandidates
+ .stream()
+ .limit(clusterCapacity.count())
+ .map(candidate -> candidate.toNode()
+ .allocate(applicationId,
+ ClusterMembership.from(clusterSpec, index.next()),
+ nodeResources,
+ nodeRepository().clock().instant()))
+ .collect(Collectors.toList());
+
+ }
+
+ private static NodeResources toNodeResources(ClusterCapacity clusterCapacity) {
+ return new NodeResources(clusterCapacity.vcpu(), clusterCapacity.memoryGb(), clusterCapacity.diskGb(),
+ clusterCapacity.bandwidthGbps());
+ }
/** Reads node resources declared by target capacity flag */
private List<NodeResources> targetCapacity() {
- return targetCapacityFlag.value().stream()
- .flatMap(cap -> {
- NodeResources resources = new NodeResources(cap.getVcpu(), cap.getMemoryGb(),
- cap.getDiskGb(), 1);
- return IntStream.range(0, cap.getCount()).mapToObj(i -> resources);
- })
- .sorted(NodeResourceComparator.memoryDiskCpuOrder().reversed())
- .collect(Collectors.toList());
+ return preprovisionCapacityFlag.value().stream()
+ .flatMap(cap -> {
+ NodeResources resources = new NodeResources(cap.vcpu(), cap.memoryGb(),
+ cap.diskGb(), cap.bandwidthGbps());
+ return IntStream.range(0, cap.count()).mapToObj(i -> resources);
+ })
+ .sorted(NodeResourceComparator.memoryDiskCpuOrder().reversed())
+ .collect(Collectors.toList());
}
/** Verify DNS configuration of given nodes */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java
index e04c1aa208d..54530297baa 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java
@@ -55,9 +55,9 @@ public class FlavorConfigBuilder {
else if (flavorName.equals("host2"))
flavorConfigBuilder.addFlavor(flavorName, 16, 24, 100, 1, Flavor.Type.BARE_METAL);
else if (flavorName.equals("host3"))
- flavorConfigBuilder.addFlavor(flavorName, 24, 64, 100, 1, Flavor.Type.BARE_METAL);
+ flavorConfigBuilder.addFlavor(flavorName, 24, 64, 100, 10, Flavor.Type.BARE_METAL);
else if (flavorName.equals("host4"))
- flavorConfigBuilder.addFlavor(flavorName, 48, 128, 1000, 1, Flavor.Type.BARE_METAL);
+ flavorConfigBuilder.addFlavor(flavorName, 48, 128, 1000, 10, Flavor.Type.BARE_METAL);
else if (flavorName.equals("devhost"))
flavorConfigBuilder.addFlavor(flavorName, 4., 80., 100, 10, Flavor.Type.BARE_METAL);
else
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 6462fb6f19d..f02659aab5f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -122,7 +122,9 @@ public class GroupPreparer {
NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requestedNodes,
highestIndex, nodeRepository);
NodePrioritizer prioritizer = new NodePrioritizer(
- allNodes, application, cluster, requestedNodes, wantedGroups, nodeRepository);
+ allNodes, application, cluster, requestedNodes, wantedGroups,
+ nodeRepository.zone().getCloud().dynamicProvisioning(), nodeRepository.nameResolver(),
+ nodeRepository.resourcesCalculator(), nodeRepository.spareCount());
allocation.offer(prioritizer.collect(surplusActiveNodes));
return allocation;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
index 14937e6afeb..460b7a821e6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
@@ -8,10 +8,10 @@ import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
-import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.IP;
+import com.yahoo.vespa.hosted.provision.persistence.NameResolver;
import com.yahoo.yolean.Exceptions;
import java.time.Instant;
@@ -25,7 +25,7 @@ import java.util.logging.Logger;
*
* @author smorgrav
*/
-abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
+public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
private static final Logger log = Logger.getLogger(NodeCandidate.class.getName());
@@ -224,8 +224,8 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
Node parent,
boolean violatesSpares,
LockedNodeList allNodes,
- NodeRepository nodeRepository) {
- return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, nodeRepository);
+ NameResolver nameResolver) {
+ return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, nameResolver);
}
public static NodeCandidate createNewExclusiveChild(Node node, Node parent) {
@@ -316,7 +316,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
/** Needed to construct the node */
private final LockedNodeList allNodes;
- private final NodeRepository nodeRepository;
+ private final NameResolver nameResolver;
private VirtualNodeCandidate(NodeResources resources,
NodeResources freeParentCapacity,
@@ -324,11 +324,11 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
boolean violatesSpares,
boolean exclusiveSwitch,
LockedNodeList allNodes,
- NodeRepository nodeRepository) {
+ NameResolver nameResolver) {
super(freeParentCapacity, Optional.of(parent), violatesSpares, exclusiveSwitch, false, true, false);
this.resources = resources;
this.allNodes = allNodes;
- this.nodeRepository = nodeRepository;
+ this.nameResolver = nameResolver;
}
@Override
@@ -361,7 +361,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
public NodeCandidate withNode() {
Optional<IP.Allocation> allocation;
try {
- allocation = parent.get().ipConfig().pool().findAllocation(allNodes, nodeRepository.nameResolver());
+ allocation = parent.get().ipConfig().pool().findAllocation(allNodes, nameResolver);
if (allocation.isEmpty()) return new InvalidNodeCandidate(resources, freeParentCapacity, parent.get(),
"No addresses available on parent host");
} catch (Exception e) {
@@ -382,7 +382,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> {
@Override
public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) {
- return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, nodeRepository);
+ return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, nameResolver);
}
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
index abfd5e021c4..b88556fbfec 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
@@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.persistence.NameResolver;
import java.util.ArrayList;
import java.util.Collections;
@@ -34,7 +35,8 @@ public class NodePrioritizer {
private final NodeSpec requestedNodes;
private final ApplicationId application;
private final ClusterSpec clusterSpec;
- private final NodeRepository nodeRepository;
+ private final NameResolver nameResolver;
+ private final boolean dynamicProvisioning;
/** Whether node specification allows new nodes to be allocated. */
private final boolean canAllocateNew;
private final boolean canAllocateToSpareHosts;
@@ -42,19 +44,19 @@ public class NodePrioritizer {
private final int currentClusterSize;
private final Set<Node> spareHosts;
- NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec,
- int wantedGroups, NodeRepository nodeRepository) {
- boolean dynamicProvisioning = nodeRepository.zone().getCloud().dynamicProvisioning();
-
+ public NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec,
+ int wantedGroups, boolean dynamicProvisioning, NameResolver nameResolver,
+ HostResourcesCalculator hostResourcesCalculator, int spareCount) {
this.allNodes = allNodes;
- this.capacity = new HostCapacity(allNodes, nodeRepository.resourcesCalculator());
+ this.capacity = new HostCapacity(allNodes, hostResourcesCalculator);
this.requestedNodes = nodeSpec;
this.clusterSpec = clusterSpec;
this.application = application;
+ this.dynamicProvisioning = dynamicProvisioning;
this.spareHosts = dynamicProvisioning ?
capacity.findSpareHostsInDynamicallyProvisionedZones(allNodes.asList()) :
- capacity.findSpareHosts(allNodes.asList(), nodeRepository.spareCount());
- this.nodeRepository = nodeRepository;
+ capacity.findSpareHosts(allNodes.asList(), spareCount);
+ this.nameResolver = nameResolver;
NodeList nodesInCluster = allNodes.owner(application).type(clusterSpec.type()).cluster(clusterSpec.id());
NodeList nonRetiredNodesInCluster = nodesInCluster.not().retired();
@@ -81,7 +83,7 @@ public class NodePrioritizer {
}
/** Collects all node candidates for this application and returns them in the most-to-least preferred order */
- List<NodeCandidate> collect(List<Node> surplusActiveNodes) {
+ public List<NodeCandidate> collect(List<Node> surplusActiveNodes) {
addApplicationNodes();
addSurplusNodes(surplusActiveNodes);
addReadyNodes();
@@ -131,7 +133,7 @@ public class NodePrioritizer {
if ( !canAllocateNew) return;
for (Node host : allNodes) {
- if ( ! nodeRepository.canAllocateTenantNodeTo(host)) continue;
+ if ( ! NodeRepository.canAllocateTenantNodeTo(host, dynamicProvisioning)) continue;
if (host.reservedTo().isPresent() && !host.reservedTo().get().equals(application.tenant())) continue;
if (host.reservedTo().isPresent() && application.instance().isTester()) continue;
if (host.exclusiveTo().isPresent()) continue; // Never allocate new nodes to exclusive hosts
@@ -143,7 +145,7 @@ public class NodePrioritizer {
host,
spareHosts.contains(host),
allNodes,
- nodeRepository));
+ nameResolver));
}
}
@@ -209,7 +211,7 @@ public class NodePrioritizer {
if (node.type() != NodeType.tenant || node.parentHostname().isEmpty()) return true;
Optional<Node> parent = allNodes.parentOf(node);
if (parent.isEmpty()) return false;
- return nodeRepository.canAllocateTenantNodeTo(parent.get());
+ return NodeRepository.canAllocateTenantNodeTo(parent.get(), dynamicProvisioning);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 2833c4e11ba..292a6872bb0 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -17,7 +17,7 @@ import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
-import com.yahoo.vespa.flags.custom.HostCapacity;
+import com.yahoo.vespa.flags.custom.ClusterCapacity;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Address;
@@ -31,7 +31,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
-import org.junit.Ignore;
import org.junit.Test;
import java.time.Duration;
@@ -43,6 +42,7 @@ import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import java.util.stream.Stream;
import static com.yahoo.vespa.hosted.provision.maintenance.DynamicProvisioningMaintainerTest.MockHostProvisioner.Behaviour;
@@ -102,9 +102,22 @@ public class DynamicProvisioningMaintainerTest {
}
@Test
+ public void does_not_deprovision_when_preprovisioning_enabled__legacy() {
+ var tester = new DynamicProvisioningTester().addInitialNodes();
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1, 3, 2, 1.0)), ClusterCapacity.class);
+ tester.flagSource.withBooleanFlag(Flags.COMPACT_PREPROVISION_CAPACITY.id(), false);
+ Optional<Node> failedHost = tester.nodeRepository.getNode("host2");
+ assertTrue(failedHost.isPresent());
+
+ tester.maintainer.maintain();
+ assertTrue("Failed host is deprovisioned", tester.nodeRepository.getNode(failedHost.get().hostname()).isEmpty());
+ assertEquals(1, tester.hostProvisioner.deprovisionedHosts);
+ }
+
+ @Test
public void does_not_deprovision_when_preprovisioning_enabled() {
var tester = new DynamicProvisioningTester().addInitialNodes();
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(1, 3, 2, 1)), HostCapacity.class);
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1, 3, 2, 1.0)), ClusterCapacity.class);
Optional<Node> failedHost = tester.nodeRepository.getNode("host2");
assertTrue(failedHost.isPresent());
@@ -114,14 +127,15 @@ public class DynamicProvisioningMaintainerTest {
}
@Test
- public void provision_deficit_and_deprovision_excess() {
+ public void provision_deficit_and_deprovision_excess__legacy() {
var tester = new DynamicProvisioningTester().addInitialNodes();
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(),
- List.of(new HostCapacity(24, 64, 100, 2),
- new HostCapacity(16, 24, 100, 1)),
- HostCapacity.class);
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 48, 128, 1000, 10.0),
+ new ClusterCapacity(1, 16, 24, 100, 1.0)),
+ ClusterCapacity.class);
+ tester.flagSource.withBooleanFlag(Flags.COMPACT_PREPROVISION_CAPACITY.id(), false);
assertTrue(tester.nodeRepository.getNode("host2").isPresent());
- assertEquals(0 ,tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(0, tester.hostProvisioner.provisionedHosts.size());
// failed host2 is removed
Optional<Node> failedHost = tester.nodeRepository.getNode("host2");
@@ -131,11 +145,128 @@ public class DynamicProvisioningMaintainerTest {
assertTrue("Host with matching resources is kept", tester.nodeRepository.getNode("host3").isPresent());
// Two more hosts are provisioned with expected resources
- NodeResources resources = new NodeResources(24, 64, 100, 1);
+ NodeResources resources = new NodeResources(48, 128, 1000, 10);
assertEquals(2, tester.provisionedHostsMatching(resources));
}
@Test
+ public void provision_deficit_and_deprovision_excess() {
+ var tester = new DynamicProvisioningTester().addInitialNodes();
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 48, 128, 1000, 10.0),
+ new ClusterCapacity(1, 16, 24, 100, 1.0)),
+ ClusterCapacity.class);
+
+ assertEquals(0, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(11, tester.nodeRepository.getNodes().size());
+ assertTrue(tester.nodeRepository.getNode("host2").isPresent());
+ assertTrue(tester.nodeRepository.getNode("host2-1").isPresent());
+ assertTrue(tester.nodeRepository.getNode("host3").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname100").isEmpty());
+ assertTrue(tester.nodeRepository.getNode("hostname101").isEmpty());
+
+ tester.maintainer.maintain();
+
+ assertEquals(2, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
+ List<Node> nodesAfter = tester.nodeRepository.getNodes();
+ assertEquals(11, nodesAfter.size()); // 2 removed, 2 added
+ assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.getNode("host2").isEmpty());
+ assertTrue("Node on deprovisioned host removed", tester.nodeRepository.getNode("host2-1").isEmpty());
+ assertTrue("Host satisfying 16-24-100-1 is kept", tester.nodeRepository.getNode("host3").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname100").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname101").isPresent());
+ }
+
+ @Test
+ public void preprovision_with_shared_host() {
+ var tester = new DynamicProvisioningTester().addInitialNodes();
+ // Makes provisioned hosts 48-128-1000-10
+ tester.hostProvisioner.provisionSharedHost("host4");
+
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 1, 30, 20, 3.0)),
+ ClusterCapacity.class);
+
+ assertEquals(0, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(11, tester.nodeRepository.getNodes().size());
+ assertTrue(tester.nodeRepository.getNode("host2").isPresent());
+ assertTrue(tester.nodeRepository.getNode("host2-1").isPresent());
+ assertTrue(tester.nodeRepository.getNode("host3").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname100").isEmpty());
+
+ // The first cluster will be allocated to host3 and a new host hostname100.
+ // hostname100 will be a large shared host specified above.
+ tester.maintainer.maintain();
+ verifyFirstMaintain(tester);
+
+ // Second maintain should be a no-op, otherwise we did wrong in the first maintain.
+ tester.maintainer.maintain();
+ verifyFirstMaintain(tester);
+
+ // Add a second cluster equal to the first. It should fit on existing host3 and hostname100.
+
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 1, 30, 20, 3.0),
+ new ClusterCapacity(2, 1, 30, 20, 3.0)),
+ ClusterCapacity.class);
+
+ tester.maintainer.maintain();
+ verifyFirstMaintain(tester);
+
+ // Change second cluster such that it doesn't fit on host3, but does on hostname100,
+ // and with a size of 2 it should allocate a new shared host.
+ // The node allocation code prefers to allocate to the shared hosts instead of host3 (at least
+ // in this test, due to skew), so host3 will be deprovisioned when hostname101 is provisioned.
+ // host3 is a 24-64-100-10 while hostname100 is 48-128-1000-10.
+
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 1, 30, 20, 3.0),
+ new ClusterCapacity(2, 24, 64, 100, 1.0)),
+ ClusterCapacity.class);
+
+ tester.maintainer.maintain();
+
+ assertEquals(2, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(2, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
+ assertEquals(10, tester.nodeRepository.getNodes().size()); // 3 removed, 2 added
+ assertTrue("preprovision capacity is prefered on shared hosts", tester.nodeRepository.getNode("host3").isEmpty());
+ assertTrue(tester.nodeRepository.getNode("hostname100").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname101").isPresent());
+
+ // If the preprovision capacity is reduced, we should see shared hosts deprovisioned.
+
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(1, 1, 30, 20, 3.0)),
+ ClusterCapacity.class);
+
+ tester.maintainer.maintain();
+
+ assertEquals("one provisioned host has been deprovisioned, so there are 2 -> 1 provisioned hosts",
+ 1, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
+ assertEquals(9, tester.nodeRepository.getNodes().size()); // 4 removed, 2 added
+ if (tester.nodeRepository.getNode("hostname100").isPresent()) {
+ assertTrue("hostname101 is superfluous and should have been deprovisioned",
+ tester.nodeRepository.getNode("hostname101").isEmpty());
+ } else {
+ assertTrue("hostname101 is required for preprovision capacity",
+ tester.nodeRepository.getNode("hostname101").isPresent());
+ }
+
+ }
+
+ private void verifyFirstMaintain(DynamicProvisioningTester tester) {
+ assertEquals(1, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10)));
+ assertEquals(10, tester.nodeRepository.getNodes().size()); // 2 removed, 1 added
+ assertTrue("Failed host 'host2' is deprovisioned", tester.nodeRepository.getNode("host2").isEmpty());
+ assertTrue("Node on deprovisioned host removed", tester.nodeRepository.getNode("host2-1").isEmpty());
+ assertTrue("One 1-30-20-3 node fits on host3", tester.nodeRepository.getNode("host3").isPresent());
+ assertTrue("New 48-128-1000-10 host added", tester.nodeRepository.getNode("hostname100").isPresent());
+ }
+
+ @Test
public void does_not_remove_if_host_provisioner_failed() {
var tester = new DynamicProvisioningTester();
Node host2 = tester.addNode("host2", Optional.empty(), NodeType.host, Node.State.failed, DynamicProvisioningTester.tenantApp);
@@ -145,58 +276,95 @@ public class DynamicProvisioningMaintainerTest {
assertTrue(tester.nodeRepository.getNode(host2.hostname()).isPresent());
}
- @Ignore // TODO (hakon): Enable as test of min-capacity specified in flag
@Test
- public void provision_exact_capacity() {
- var tester = new DynamicProvisioningTester(Cloud.builder().dynamicProvisioning(true).build());
- NodeResources resources1 = new NodeResources(24, 64, 100, 1);
- NodeResources resources2 = new NodeResources(16, 24, 100, 1);
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1),
- new HostCapacity(resources2.vcpu(), resources2.memoryGb(), resources2.diskGb(), 2)),
- HostCapacity.class);
+ public void test_minimum_capacity() {
+ var tester = new DynamicProvisioningTester();
+ NodeResources resources1 = new NodeResources(24, 64, 100, 10);
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), resources1.bandwidthGbps())),
+ ClusterCapacity.class);
tester.maintainer.maintain();
// Hosts are provisioned
- assertEquals(1, tester.provisionedHostsMatching(resources1));
- assertEquals(2, tester.provisionedHostsMatching(resources2));
+ assertEquals(2, tester.provisionedHostsMatching(resources1));
+ assertEquals(0, tester.hostProvisioner.deprovisionedHosts);
// Next maintenance run does nothing
tester.assertNodesUnchanged();
- // Target capacity is changed
- NodeResources resources3 = new NodeResources(48, 128, 1000, 1);
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1),
- new HostCapacity(resources3.vcpu(), resources3.memoryGb(), resources3.diskGb(), 1)),
- HostCapacity.class);
+ // Pretend shared-host flag has been set to host4's flavor
+ var sharedHostNodeResources = new NodeResources(48, 128, 1000, 10, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote);
+ tester.hostProvisioner.provisionSharedHost("host4");
- // Excess hosts are deprovisioned
- tester.maintainer.maintain();
- assertEquals(1, tester.provisionedHostsMatching(resources1));
- assertEquals(0, tester.provisionedHostsMatching(resources2));
- assertEquals(1, tester.provisionedHostsMatching(resources3));
- assertEquals(2, tester.nodeRepository.getNodes(Node.State.deprovisioned).size());
+ // Next maintenance run does nothing
+ tester.assertNodesUnchanged();
+
+ // Must be able to allocate 2 nodes with "no resource requirement"
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(2, 0, 0, 0, 0.0)),
+ ClusterCapacity.class);
+
+ // Next maintenance run does nothing
+ tester.assertNodesUnchanged();
// Activate hosts
- tester.maintainer.maintain(); // Resume provisioning of new hosts
List<Node> provisioned = tester.nodeRepository.list().state(Node.State.provisioned).asList();
tester.nodeRepository.setReady(provisioned, Agent.system, this.getClass().getSimpleName());
tester.provisioningTester.activateTenantHosts();
// Allocating nodes to a host does not result in provisioning of additional capacity
ApplicationId application = ProvisioningTester.applicationId();
+ NodeResources applicationNodeResources = new NodeResources(4, 8, 50, 0.1);
tester.provisioningTester.deploy(application,
- Capacity.from(new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1))));
+ Capacity.from(new ClusterResources(2, 1, applicationNodeResources)));
assertEquals(2, tester.nodeRepository.list().owner(application).size());
tester.assertNodesUnchanged();
// Clearing flag does nothing
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(), HostCapacity.class);
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(), ClusterCapacity.class);
+ tester.assertNodesUnchanged();
+
+ // Increasing the capacity provisions additional hosts
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(3, 0, 0, 0, 0.0)),
+ ClusterCapacity.class);
+ assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources));
+ assertTrue(tester.nodeRepository.getNode("hostname102").isEmpty());
+ tester.maintainer.maintain();
+ assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources));
+ assertTrue(tester.nodeRepository.getNode("hostname102").isPresent());
+
+ // Next maintenance run does nothing
tester.assertNodesUnchanged();
- // Capacity reduction does not remove host with children
- tester.flagSource.withListFlag(Flags.TARGET_CAPACITY.id(), List.of(new HostCapacity(resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), 1)),
- HostCapacity.class);
+ // Requiring >0 capacity does nothing as long as it fits on the 3 hosts
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(3,
+ resources1.vcpu() - applicationNodeResources.vcpu(),
+ resources1.memoryGb() - applicationNodeResources.memoryGb(),
+ resources1.diskGb() - applicationNodeResources.diskGb(),
+ resources1.bandwidthGbps() - applicationNodeResources.bandwidthGbps())),
+ ClusterCapacity.class);
tester.assertNodesUnchanged();
+
+ // But requiring a bit more in the cluster => provisioning of 2 shared hosts.
+ tester.flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(),
+ List.of(new ClusterCapacity(3,
+ resources1.vcpu() - applicationNodeResources.vcpu() + 1,
+ resources1.memoryGb() - applicationNodeResources.memoryGb() + 1,
+ resources1.diskGb() - applicationNodeResources.diskGb() + 1,
+ resources1.bandwidthGbps())),
+ ClusterCapacity.class);
+
+ assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources));
+ assertTrue(tester.nodeRepository.getNode("hostname102").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname103").isEmpty());
+ assertTrue(tester.nodeRepository.getNode("hostname104").isEmpty());
+ tester.maintainer.maintain();
+ assertEquals(3, tester.provisionedHostsMatching(sharedHostNodeResources));
+ assertTrue(tester.nodeRepository.getNode("hostname102").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname103").isPresent());
+ assertTrue(tester.nodeRepository.getNode("hostname104").isPresent());
}
@Test
@@ -225,9 +393,7 @@ public class DynamicProvisioningMaintainerTest {
private static final ApplicationId proxyApp = ApplicationId.from("vespa", "proxy", "default");
private static final NodeFlavors flavors = FlavorConfigBuilder.createDummies("default", "docker", "host2", "host3", "host4");
- private final InMemoryFlagSource flagSource = new InMemoryFlagSource().withListFlag(Flags.TARGET_CAPACITY.id(),
- List.of(),
- HostCapacity.class);
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
private final NodeRepository nodeRepository;
private final MockHostProvisioner hostProvisioner;
@@ -260,9 +426,10 @@ public class DynamicProvisioningMaintainerTest {
List.of(createNode("host1", Optional.empty(), NodeType.host, Node.State.active, Optional.of(tenantHostApp)),
createNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)),
createNode("host1-2", Optional.of("host1"), NodeType.tenant, Node.State.failed, Optional.empty()),
- createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, Optional.of(tenantApp)),
+ createNode("host2", Optional.empty(), NodeType.host, Node.State.failed, Optional.of(tenantHostApp)),
createNode("host2-1", Optional.of("host2"), NodeType.tenant, Node.State.failed, Optional.empty()),
- createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty()),
+ createNode("host3", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty(),
+ "host3-1", "host3-2", "host3-3", "host3-4", "host3-5"),
createNode("host4", Optional.empty(), NodeType.host, Node.State.provisioned, Optional.empty()),
createNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)),
createNode("proxyhost1", Optional.empty(), NodeType.proxyhost, Node.State.provisioned, Optional.empty()),
@@ -281,8 +448,9 @@ public class DynamicProvisioningMaintainerTest {
return nodeRepository.database().addNodesInState(List.of(node), node.state(), Agent.system).get(0);
}
- private Node createNode(String hostname, Optional<String> parentHostname, NodeType nodeType, Node.State state, Optional<ApplicationId> application) {
- Flavor flavor = nodeRepository.flavors().getFlavor(parentHostname.isPresent() ? "docker" : "host2").orElseThrow();
+ private Node createNode(String hostname, Optional<String> parentHostname, NodeType nodeType,
+ Node.State state, Optional<ApplicationId> application, String... additionalHostnames) {
+ Flavor flavor = nodeRepository.flavors().getFlavor(parentHostname.isPresent() ? "docker" : "host3").orElseThrow();
Optional<Allocation> allocation = application
.map(app -> new Allocation(
app,
@@ -290,8 +458,9 @@ public class DynamicProvisioningMaintainerTest {
flavor.resources(),
Generation.initial(),
false));
+ List<Address> addresses = Stream.of(additionalHostnames).map(Address::new).collect(Collectors.toList());
Node.Builder builder = Node.create("fake-id-" + hostname, hostname, flavor, state, nodeType)
- .ipConfigWithEmptyPool(state == Node.State.active ? Set.of("::1") : Set.of());
+ .ipConfig(new IP.Config(state == Node.State.active ? Set.of("::1") : Set.of(), Set.of(), addresses));
parentHostname.ifPresent(builder::parentHostname);
allocation.ifPresent(builder::allocation);
return builder.build();
@@ -299,7 +468,7 @@ public class DynamicProvisioningMaintainerTest {
private long provisionedHostsMatching(NodeResources resources) {
return hostProvisioner.provisionedHosts.stream()
- .filter(host -> host.nodeResources().equals(resources))
+ .filter(host -> host.generateHost().resources().compatibleWith(resources))
.count();
}
@@ -319,27 +488,35 @@ public class DynamicProvisioningMaintainerTest {
private int deprovisionedHosts = 0;
private EnumSet<Behaviour> behaviours = EnumSet.noneOf(Behaviour.class);
+ private Optional<Flavor> provisionHostFlavor = Optional.empty();
public MockHostProvisioner(NodeFlavors flavors, MockNameResolver nameResolver) {
this.flavors = flavors;
this.nameResolver = nameResolver;
}
+ public MockHostProvisioner provisionSharedHost(String flavorName) {
+ provisionHostFlavor = Optional.of(flavors.getFlavorOrThrow(flavorName));
+ return this;
+ }
+
@Override
public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndexes, NodeResources resources,
ApplicationId applicationId, Version osVersion, HostSharing sharing) {
- Flavor hostFlavor = flavors.getFlavors().stream()
- .filter(f -> !f.isDocker())
- .filter(f -> f.resources().compatibleWith(resources))
- .findFirst()
- .orElseThrow(() -> new IllegalArgumentException("No host flavor found satisfying " + resources));
+ Flavor hostFlavor = provisionHostFlavor
+ .orElseGet(() -> flavors.getFlavors().stream()
+ .filter(f -> !f.isDocker())
+ .filter(f -> f.resources().compatibleWith(resources))
+ .findFirst()
+ .orElseThrow(() -> new IllegalArgumentException("No host flavor found satisfying " + resources)));
+
List<ProvisionedHost> hosts = new ArrayList<>();
for (int index : provisionIndexes) {
hosts.add(new ProvisionedHost("host" + index,
"hostname" + index,
hostFlavor,
Optional.empty(),
- List.of(new Address("nodename" + index)),
+ createAddressesForHost(hostFlavor, index),
resources,
osVersion));
}
@@ -347,6 +524,13 @@ public class DynamicProvisioningMaintainerTest {
return hosts;
}
+ private List<Address> createAddressesForHost(Flavor flavor, int hostIndex) {
+ long numAddresses = Math.max(1, Math.round(flavor.resources().bandwidthGbps()));
+ return IntStream.range(0, (int) numAddresses)
+ .mapToObj(i -> new Address("nodename" + hostIndex + "_" + i))
+ .collect(Collectors.toList());
+ }
+
@Override
public List<Node> provision(Node host, Set<Node> children) throws FatalProvisioningException {
if (behaviours.contains(Behaviour.failProvisioning)) throw new FatalProvisioningException("Failed to provision node(s)");