summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-04-26 12:25:20 +0200
committerJon Bratseth <bratseth@gmail.com>2020-04-26 12:25:20 +0200
commita7ca18c57e6fdfce0d06db7efb41f87a2049206e (patch)
tree70aff2a2bf9bc1358f10e8caf02501f606dea259
parent8846aa2ddc4efffddf77f6b92e1b8a012b318232 (diff)
Smarter window change handling
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java7
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java54
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/MultigroupProvisioningTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java56
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java25
6 files changed, 131 insertions, 15 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
index 3c7a14b9496..f723575c342 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java
@@ -72,7 +72,12 @@ public final class Capacity {
/** Create a non-required, failable capacity request */
public static Capacity from(ClusterResources resources) {
- return from(resources, false, true);
+ return from(resources, resources);
+ }
+
+ /** Create a non-required, failable capacity request */
+ public static Capacity from(ClusterResources min, ClusterResources max) {
+ return from(min, max, false, true);
}
public static Capacity from(ClusterResources resources, boolean required, boolean canFail) {
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
index 97549e851ad..3a230c89732 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java
@@ -29,7 +29,7 @@ public final class ClusterSpec {
this.type = type;
this.id = id;
this.groupId = groupId;
- this.vespaVersion = vespaVersion;
+ this.vespaVersion = Objects.requireNonNull(vespaVersion);
this.exclusive = exclusive;
// TODO(mpolden): Require combinedId to always be present for type combined after April 2020
if (type != Type.combined && combinedId.isPresent()) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index e3b7c62cc0d..9ffc4d0efac 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -137,32 +137,62 @@ public class NodeRepositoryProvisioner implements Provisioner {
application = application.withClusterLimits(clusterId, requested.minResources(), requested.maxResources());
nodeRepository.applications().put(application, lock);
return application.clusters().get(clusterId).targetResources()
- .orElseGet(() -> currentResources(applicationId, clusterId, requested)
- .orElse(requested.minResources()));
+ .orElseGet(() -> currentResources(applicationId, clusterId, requested));
}
}
- /** Returns the current resources of this cluster, if it's already deployed and inside the requested limits */
- private Optional<ClusterResources> currentResources(ApplicationId applicationId,
- ClusterSpec.Id clusterId,
- Capacity requested) {
+ /** Returns the current resources of this cluster, or the closes */
+ private ClusterResources currentResources(ApplicationId applicationId,
+ ClusterSpec.Id clusterId,
+ Capacity requested) {
List<Node> nodes = NodeList.copyOf(nodeRepository.getNodes(applicationId, Node.State.active))
.cluster(clusterId)
.not().retired()
.not().removable()
.asList();
- if (nodes.isEmpty()) return Optional.empty();
- long groups = nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
+ if (nodes.isEmpty()) return requested.minResources(); // New deployment: Start at min
- // To allow non-numeric settings to be updated without resetting to the min target, we need to use
- // the non-numeric settings of the current min limit with the current numeric settings
+ long groups = nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count();
+ // Settings which are not autoscaled should always be taken from the currently requested capacity
+ // and for those min and max are always the same
NodeResources nodeResources = nodes.get(0).allocation().get().requestedResources()
+ .withBandwidthGbps(requested.minResources().nodeResources().bandwidthGbps())
.with(requested.minResources().nodeResources().diskSpeed())
.with(requested.minResources().nodeResources().storageType());
var currentResources = new ClusterResources(nodes.size(), (int)groups, nodeResources);
- if ( ! currentResources.isWithin(requested.minResources(), requested.maxResources())) return Optional.empty();
+ return ensureWithin(requested.minResources(), requested.maxResources(), currentResources);
+ }
+
+ /** Make the minimal adjustments needed to the current resources to stay within the limits */
+ private ClusterResources ensureWithin(ClusterResources min, ClusterResources max, ClusterResources current) {
+ int nodes = between(min.nodes(), max.nodes(), current.nodes());
+ int groups = between(min.groups(), max.groups(), current.groups());
+ if (nodes % groups != 0) {
+ // That didn't work - try to preserve current group size instead.
+ // Rounding here is needed because a node may be missing due to node failing.
+ int currentGroupsSize = Math.round((float)current.nodes() / current.groups());
+ nodes = currentGroupsSize * groups;
+ if (nodes != between(min.nodes(), max.nodes(), nodes)) {
+ // Give up: Use max
+ nodes = max.nodes();
+ groups = max.groups();
+ }
+ }
+ double vcpu = between(min.nodeResources().vcpu(), max.nodeResources().vcpu(), current.nodeResources().vcpu());
+ double memoryGb = between(min.nodeResources().memoryGb(), max.nodeResources().memoryGb(), current.nodeResources().memoryGb());
+ double diskGb = between(min.nodeResources().diskGb(), max.nodeResources().diskGb(), current.nodeResources().diskGb());
+ NodeResources nodeResources = current.nodeResources().withVcpu(vcpu)
+ .withMemoryGb(memoryGb)
+ .withDiskGb(diskGb);
+ return new ClusterResources(nodes, groups, nodeResources);
+ }
+
+ private int between(int min, int max, int n) {
+ return Math.min(max, Math.max(min, n));
+ }
- return Optional.of(currentResources);
+ private double between(double min, double max, double n) {
+ return Math.min(max, Math.max(min, n));
}
private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/MultigroupProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/MultigroupProvisioningTest.java
index 1265961e351..050f3b7e865 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/MultigroupProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/MultigroupProvisioningTest.java
@@ -109,7 +109,7 @@ public class MultigroupProvisioningTest {
}
@Test
- public void test_one_node_and_group_to_two_with_flavor_migration() {
+ public void test_one_node_and_group_to_two_with_resource_change() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).build();
ApplicationId application1 = tester.makeApplicationId();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index 8fc73d68785..1b7c75a03ac 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -405,6 +405,58 @@ public class ProvisioningTest {
tester.activate(application, state.allHosts);
}
+ @Test
+ public void test_changing_limits() {
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
+ tester.makeReadyHosts(30, new NodeResources(20, 40, 100, 4));
+ tester.deployZoneApp();
+
+ ApplicationId app1 = tester.makeApplicationId("app1");
+ ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+
+ // Initial deployment
+ tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 2, 10, 20),
+ resources(8, 4, 4, 20, 40)));
+ tester.assertNodes("Initial allocation at min",
+ 4, 2, 2, 10, 20,
+ app1, cluster1);
+
+ // Move window above current allocation
+ tester.activate(app1, cluster1, Capacity.from(resources(8, 4, 4, 20, 40),
+ resources(10, 5, 5, 25, 50)));
+ tester.assertNodes("New allocation at new min",
+ 8, 4, 4, 20, 40,
+ app1, cluster1);
+
+ // Move window below current allocation
+ tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 2, 10, 20),
+ resources(6, 3, 3, 15, 25)));
+ tester.assertNodes("New allocation at new max",
+ 6, 3, 3, 15, 25,
+ app1, cluster1);
+
+ // Widening window does not change allocation
+ tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 1, 5, 15),
+ resources(8, 4, 4, 20, 30)));
+ tester.assertNodes("Same allocation",
+ 6, 3, 3, 15, 25,
+ app1, cluster1);
+
+ // Changing limits in opposite directions cause a mixture of min and max
+ tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 10, 30, 5),
+ resources(4, 2, 14, 40, 10)));
+ tester.assertNodes("A mix of min and max",
+ 4, 2, 10, 30, 10,
+ app1, cluster1);
+
+ // Changing group size
+ tester.activate(app1, cluster1, Capacity.from(resources(6, 3, 8, 25, 5),
+ resources(9, 3, 12, 35, 15)));
+ tester.assertNodes("Groups changed",
+ 6, 3, 10, 30, 10,
+ app1, cluster1);
+ }
+
@Test(expected = IllegalArgumentException.class)
public void prod_deployment_requires_redundancy() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
@@ -920,4 +972,8 @@ public class ProvisioningTest {
.isPresent();
}
+ private ClusterResources resources(int nodes, int groups, double vcpu, double memory, double disk) {
+ return new ClusterResources(nodes, groups, new NodeResources(vcpu, memory, disk, 0.1));
+ }
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index a8df47aab1a..cb4509b3de7 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -149,6 +149,10 @@ public class ProvisioningTester {
return hosts1;
}
+ public Collection<HostSpec> activate(ApplicationId application, ClusterSpec cluster, Capacity capacity) {
+ return activate(application, prepare(application, cluster, capacity, true));
+ }
+
public Collection<HostSpec> activate(ApplicationId application, Collection<HostSpec> hosts) {
NestedTransaction transaction = new NestedTransaction();
transaction.add(new CuratorTransaction(curator));
@@ -197,6 +201,22 @@ public class ProvisioningTester {
}
}
+ /** Assert on the current *non retired* nodes */
+ public void assertNodes(String explanation, int nodes, int groups, double vcpu, double memory, double disk,
+ ApplicationId app, ClusterSpec cluster) {
+ List<Node> nodeList = nodeRepository.list().owner(app).cluster(cluster.id()).not().retired().asList();
+ assertEquals(explanation + ": Node count",
+ nodes,
+ nodeList.size());
+ assertEquals(explanation + ": Group count",
+ groups,
+ nodeList.stream().map(n -> n.allocation().get().membership().cluster().group().get()).distinct().count());
+ for (Node node : nodeList)
+ assertEquals(explanation + ": Resources",
+ new NodeResources(vcpu, memory, disk, 0.1),
+ node.flavor().resources());
+ }
+
public void fail(HostSpec host) {
int beforeFailCount = nodeRepository.getNode(host.hostname(), Node.State.active).get().status().failCount();
Node failedNode = nodeRepository.fail(host.hostname(), Agent.system, "Failing to unit test");
@@ -241,6 +261,11 @@ public class ProvisioningTester {
return makeReadyNodes(n, flavor, NodeType.tenant);
}
+ /** Call depliyZoneApp() after this before deploying applications */
+ public List<Node> makeReadyHosts(int n, NodeResources resources) {
+ return makeReadyNodes(n, resources, NodeType.host, 5);
+ }
+
public List<Node> makeReadyNodes(int n, NodeResources resources) {
return makeReadyNodes(n, resources, NodeType.tenant, 0);
}