aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-03-27 12:57:10 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-03-27 12:57:10 +0100
commitbcaf74cc7cddd26f315ea9c60ceb8a5f9b665168 (patch)
treee45f781aaedd60fbc95413dd331031be4145556e /node-repository
parente63f7068d3716ef7aa174d6ae7c9a5a5dd754ee3 (diff)
Maintain application min, max and target resources
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java37
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java24
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java46
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java34
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java84
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java10
7 files changed, 193 insertions, 48 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 3cf2442f6f7..4ba480b73b1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -19,6 +19,7 @@ import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
@@ -98,6 +99,7 @@ public class NodeRepository extends AbstractComponent {
private final FirmwareChecks firmwareChecks;
private final DockerImages dockerImages;
private final JobControl jobControl;
+ private final Applications applications;
/**
* Creates a node repository from a zookeeper provider.
@@ -124,6 +126,7 @@ public class NodeRepository extends AbstractComponent {
this.firmwareChecks = new FirmwareChecks(db, clock);
this.dockerImages = new DockerImages(db, dockerImage);
this.jobControl = new JobControl(db);
+ this.applications = new Applications();
// read and write all nodes to make sure they are stored in the latest version of the serialized format
for (State state : State.values())
@@ -154,6 +157,9 @@ public class NodeRepository extends AbstractComponent {
/** Returns the status of maintenance jobs managed by this. */
public JobControl jobControl() { return jobControl; }
+ /** Returns this node repo's view of the applications deployed to it */
+ public Applications applications() { return applications; }
+
// ---------------- Query API ----------------------------------------------------------------
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java
new file mode 100644
index 00000000000..7dd2dc7be17
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java
@@ -0,0 +1,37 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.applications;
+
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.transaction.Mutex;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Optional;
+
+/**
+ * The node repository's view of an application deployment.
+ *
+ * This is immutable, and must be locked with the application lock on read-modify-write.
+ *
+ * @author bratseth
+ */
+public class Application {
+
+ private Map<ClusterSpec.Id, Cluster> clusters = new HashMap<>();
+
+ /** Returns the cluster with the given id or null if none */
+ public Cluster cluster(ClusterSpec.Id id) { return clusters.get(id); }
+
+ /**
+ * Sets the min and max resource limits of the given cluster.
+ * This will create the cluster with these limits if it does not exist.
+ * If the cluster has a target which is not inside the new limits, the target is removed.
+ */
+ public void setClusterLimits(ClusterSpec.Id id, ClusterResources min, ClusterResources max, Mutex applicationLock) {
+ Cluster cluster = clusters.computeIfAbsent(id, clusterId -> new Cluster(min, max, Optional.empty()));
+ if (cluster.targetResources().isPresent() && ! cluster.targetResources().get().isWithin(min, max))
+ clusters.put(id, cluster.withoutTarget());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java
new file mode 100644
index 00000000000..1409857df1a
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java
@@ -0,0 +1,24 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.applications;
+
+import com.yahoo.config.provision.ApplicationId;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * An (in-memory, for now) repository of the node repo's view of applications.
+ *
+ * This is multithread safe.
+ *
+ * @author bratseth
+ */
+public class Applications {
+
+ private final ConcurrentHashMap<ApplicationId, Application> applications = new ConcurrentHashMap<>();
+
+ /** Returns the application with the given id, or null if it does not exist and should not be created */
+ public Application get(ApplicationId applicationId, boolean create) {
+ return applications.computeIfAbsent(applicationId, id -> create ? new Application() : null);
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
new file mode 100644
index 00000000000..d4f7f10abfd
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -0,0 +1,46 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.applications;
+
+import com.yahoo.config.provision.ClusterResources;
+
+import java.util.Optional;
+
+/**
+ * The node repo's view of a cluster in an application deployment.
+ *
+ * This is immutable, and must be locked with the application lock on read-modify-write.
+ *
+ * @author bratseth
+ */
+public class Cluster {
+
+ private final ClusterResources minResources, maxResources;
+ private final Optional<ClusterResources> targetResources;
+
+ Cluster(ClusterResources minResources, ClusterResources maxResources, Optional<ClusterResources> targetResources) {
+ this.minResources = minResources;
+ this.maxResources = maxResources;
+ this.targetResources = targetResources;
+ }
+
+ /** Returns the configured minimal resources in this cluster */
+ public ClusterResources minResources() { return minResources; }
+
+ /** Returns the configured maximal resources in this cluster */
+ public ClusterResources maxResources() { return maxResources; }
+
+ /**
+ * Returns the computed resources (between min and max, inclusive) this cluster should
+ * have allocated at the moment, or empty if the system currently have no opinion on this.
+ */
+ public Optional<ClusterResources> targetResources() { return targetResources; }
+
+ public Cluster withTarget(ClusterResources target) {
+ return new Cluster(minResources, maxResources, Optional.of(target));
+ }
+
+ public Cluster withoutTarget() {
+ return new Cluster(minResources, maxResources, Optional.empty());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
index a23c1a932d4..d1cd4752c93 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeResources;
@@ -31,42 +32,37 @@ public class CapacityPolicies {
this.isUsingAdvertisedResources = zone.cloud().value().equals("aws");
}
- public int decideSize(Capacity capacity, ClusterSpec cluster, ApplicationId application) {
- int requestedNodes = capacity.minResources().nodes();
-
+ public int decideSize(int requested, Capacity capacity, ClusterSpec cluster, ApplicationId application) {
if (application.instance().isTester()) return 1;
- ensureRedundancy(requestedNodes, cluster, capacity.canFail());
-
- if (capacity.isRequired()) return requestedNodes;
-
+ ensureRedundancy(requested, cluster, capacity.canFail());
+ if (capacity.isRequired()) return requested;
switch(zone.environment()) {
case dev : case test : return 1;
- case perf : return Math.min(capacity.minResources().nodes(), 3);
- case staging: return requestedNodes <= 1 ? requestedNodes : Math.max(2, requestedNodes / 10);
- case prod : return requestedNodes;
+ case perf : return Math.min(requested, 3);
+ case staging: return requested <= 1 ? requested : Math.max(2, requested / 10);
+ case prod : return requested;
default : throw new IllegalArgumentException("Unsupported environment " + zone.environment());
}
}
- public NodeResources decideNodeResources(Capacity capacity, ClusterSpec cluster) {
- NodeResources resources = capacity.minResources().nodeResources();
- if (resources == NodeResources.unspecified)
- resources = defaultNodeResources(cluster.type());
- ensureSufficientResources(resources, cluster);
+ public NodeResources decideNodeResources(NodeResources requested, Capacity capacity, ClusterSpec cluster) {
+ if (requested == NodeResources.unspecified)
+ requested = defaultNodeResources(cluster.type());
+ ensureSufficientResources(requested, cluster);
- if (capacity.isRequired()) return resources;
+ if (capacity.isRequired()) return requested;
// Allow slow storage in zones which are not performance sensitive
if (zone.system().isCd() || zone.environment() == Environment.dev || zone.environment() == Environment.test)
- resources = resources.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any);
+ requested = requested.with(NodeResources.DiskSpeed.any).with(NodeResources.StorageType.any);
// Dev does not cap the cpu of containers since usage is spotty: Allocate just a small amount exclusively
// Do not cap in AWS as hosts are allocated on demand and 1-to-1, so the node can use the entire host
if (zone.environment() == Environment.dev && !zone.region().value().contains("aws-"))
- resources = resources.withVcpu(0.1);
+ requested = requested.withVcpu(0.1);
- return resources;
+ return requested;
}
private void ensureSufficientResources(NodeResources resources, ClusterSpec cluster) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 2c2c927034b..083f8db5aa5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.google.inject.Inject;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Environment;
@@ -15,10 +16,14 @@ import com.yahoo.config.provision.ProvisionLogger;
import com.yahoo.config.provision.Provisioner;
import com.yahoo.config.provision.Zone;
import com.yahoo.log.LogLevel;
+import com.yahoo.transaction.Mutex;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.applications.Application;
+import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter;
import com.yahoo.vespa.hosted.provision.node.filter.NodeHostFilter;
@@ -87,38 +92,36 @@ public class NodeRepositoryProvisioner implements Provisioner {
* The nodes are ordered by increasing index number.
*/
@Override
- public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requestedCapacity,
+ public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested,
ProvisionLogger logger) {
- if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group");
-
log.log(zone.system().isCd() ? Level.INFO : LogLevel.DEBUG,
- () -> "Received deploy prepare request for " + requestedCapacity +
+ () -> "Received deploy prepare request for " + requested +
" for application " + application + ", cluster " + cluster);
- int effectiveGroups;
- NodeSpec requestedNodes;
- NodeResources resources = requestedCapacity.minResources().nodeResources();
- if ( requestedCapacity.type() == NodeType.tenant) {
- int nodeCount = capacityPolicies.decideSize(requestedCapacity, cluster, application);
- if (zone.environment().isManuallyDeployed() && nodeCount < requestedCapacity.minResources().nodes())
- logger.log(Level.INFO, "Requested " + requestedCapacity.minResources().nodes() + " nodes for " + cluster +
- ", downscaling to " + nodeCount + " nodes in " + zone.environment());
- resources = capacityPolicies.decideNodeResources(requestedCapacity, cluster);
- boolean exclusive = capacityPolicies.decideExclusivity(cluster.isExclusive());
- effectiveGroups = Math.min(requestedCapacity.minResources().groups(), nodeCount); // cannot have more groups than nodes
- requestedNodes = NodeSpec.from(nodeCount, resources, exclusive, requestedCapacity.canFail());
+ if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group");
- if ( ! hasQuota(application, nodeCount))
- throw new IllegalArgumentException(requestedCapacity + " requested for " + cluster +
- (requestedCapacity.minResources().nodes() != nodeCount ? " resolved to " + nodeCount + " nodes" : "") +
- " exceeds your quota. Resolve this at https://cloud.vespa.ai/quota");
+ if ( ! hasQuota(application, requested.maxResources().nodes()))
+ throw new IllegalArgumentException(requested + " requested for " + cluster +
+ ". Max value exceeds your quota. Resolve this at https://cloud.vespa.ai/quota");
+
+ int groups;
+ NodeResources resources;
+ NodeSpec nodeSpec;
+ if ( requested.type() == NodeType.tenant) {
+ ClusterResources target = decideTargetResources(application, cluster.id(), requested);
+ int nodeCount = capacityPolicies.decideSize(target.nodes(), requested, cluster, application);
+ resources = capacityPolicies.decideNodeResources(target.nodeResources(), requested, cluster);
+ boolean exclusive = capacityPolicies.decideExclusivity(cluster.isExclusive());
+ groups = Math.min(target.groups(), nodeCount); // cannot have more groups than nodes
+ nodeSpec = NodeSpec.from(nodeCount, resources, exclusive, requested.canFail());
+ logIfDownscaled(target.nodes(), nodeCount, cluster, logger);
}
else {
- requestedNodes = NodeSpec.from(requestedCapacity.type());
- effectiveGroups = 1; // type request with multiple groups is not supported
+ groups = 1; // type request with multiple groups is not supported
+ resources = requested.minResources().nodeResources();
+ nodeSpec = NodeSpec.from(requested.type());
}
-
- return asSortedHosts(preparer.prepare(application, cluster, requestedNodes, effectiveGroups), resources);
+ return asSortedHosts(preparer.prepare(application, cluster, nodeSpec, groups), resources);
}
@Override
@@ -138,6 +141,39 @@ public class NodeRepositoryProvisioner implements Provisioner {
loadBalancerProvisioner.ifPresent(lbProvisioner -> lbProvisioner.deactivate(application, transaction));
}
+ /**
+ * Returns the target cluster resources, a value between the min and max in the requested capacity,
+ * and updates the application store with the received min and max,
+ */
+ private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity requested) {
+ try (Mutex lock = nodeRepository.lock(applicationId)) {
+ Application application = nodeRepository.applications().get(applicationId, true);
+ application.setClusterLimits(clusterId, requested.minResources(), requested.maxResources(), lock);
+ return application.cluster(clusterId).targetResources()
+ .orElse(currentResources(applicationId, clusterId, requested)
+ .orElse(requested.minResources()));
+ }
+ }
+
+ /** Returns the current resources of this cluster, if it'1s already depoyed and inside the requested limits */
+ private Optional<ClusterResources> currentResources(ApplicationId applicationId,
+ ClusterSpec.Id clusterId,
+ Capacity requested) {
+ List<Node> nodes = NodeList.copyOf(nodeRepository.getNodes(applicationId, Node.State.active))
+ .cluster(clusterId).not().retired().asList();
+ if (nodes.size() < 1) return Optional.empty();
+ long groups = nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
+ var resources = new ClusterResources(nodes.size(), (int)groups, nodes.get(0).flavor().resources());
+ if ( ! resources.isWithin(requested.minResources(), requested.maxResources())) return Optional.empty();
+ return Optional.of(resources);
+ }
+
+ private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) {
+ if (zone.environment().isManuallyDeployed() && actualNodes < targetNodes)
+ logger.log(Level.INFO, "Requested " + targetNodes + " nodes for " + cluster +
+ ", downscaling to " + actualNodes + " nodes in " + zone.environment());
+ }
+
private boolean hasQuota(ApplicationId application, int requestedNodes) {
if ( ! this.zone.system().isPublic()) return true; // no quota management
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index f88cb839946..76258e86de9 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -493,7 +493,7 @@ public class ProvisioningTest {
fail("Expected exception");
}
catch (IllegalArgumentException e) {
- assertEquals("6 nodes with [vcpu: 1.0, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 4.0 Gbps] requested for content cluster 'content0' 6.42 exceeds your quota. Resolve this at https://cloud.vespa.ai/quota",
+ assertEquals("6 nodes with [vcpu: 1.0, memory: 4.0 Gb, disk 10.0 Gb, bandwidth: 4.0 Gbps] requested for content cluster 'content0' 6.42. Max value exceeds your quota. Resolve this at https://cloud.vespa.ai/quota",
e.getMessage());
}
}
@@ -772,10 +772,10 @@ public class ProvisioningTest {
allHosts.addAll(content1);
Function<Integer, Capacity> capacity = count -> Capacity.from(new ClusterResources(count, 1, NodeResources.unspecified), required, true);
- int expectedContainer0Size = tester.capacityPolicies().decideSize(capacity.apply(container0Size), containerCluster0, application);
- int expectedContainer1Size = tester.capacityPolicies().decideSize(capacity.apply(container1Size), containerCluster1, application);
- int expectedContent0Size = tester.capacityPolicies().decideSize(capacity.apply(content0Size), contentCluster0, application);
- int expectedContent1Size = tester.capacityPolicies().decideSize(capacity.apply(content1Size), contentCluster1, application);
+ int expectedContainer0Size = tester.capacityPolicies().decideSize(container0Size, capacity.apply(container0Size), containerCluster0, application);
+ int expectedContainer1Size = tester.capacityPolicies().decideSize(container1Size, capacity.apply(container1Size), containerCluster1, application);
+ int expectedContent0Size = tester.capacityPolicies().decideSize(content0Size, capacity.apply(content0Size), contentCluster0, application);
+ int expectedContent1Size = tester.capacityPolicies().decideSize(content1Size, capacity.apply(content1Size), contentCluster1, application);
assertEquals("Hosts in each group cluster is disjunct and the total number of unretired nodes is correct",
expectedContainer0Size + expectedContainer1Size + expectedContent0Size + expectedContent1Size,