summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-02-03 18:46:20 +0100
committerJon Bratseth <bratseth@gmail.com>2021-02-03 18:46:20 +0100
commitfcb4c2aa12eae4befe7ba9d11aeabbef6d889015 (patch)
tree3772d1d2a0d3e130ac2171a836f6f1871ad1f582
parent16f6e4fbcdd3c57010427ef0ad4f46d219c3f77d (diff)
Scale content clusters to minimum 3 nodes
There is no cluster controller redundancy with 2 nodes and this leads to operational problems.
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java22
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java22
6 files changed, 51 insertions, 8 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java
index d4c52b97f45..a13128b8776 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterResources.java
@@ -31,6 +31,7 @@ public class ClusterResources {
public NodeResources nodeResources() { return nodeResources; }
public ClusterResources with(NodeResources resources) { return new ClusterResources(nodes, groups, resources); }
+ public ClusterResources withNodes(int nodes) { return new ClusterResources(nodes, groups, nodeResources); }
public ClusterResources withGroups(int groups) { return new ClusterResources(nodes, groups, nodeResources); }
/** Returns true if this is smaller than the given resources in any dimension */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index bfb3bfeb480..03617eeefd8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -16,7 +16,8 @@ import java.util.Optional;
public class AllocationOptimizer {
// The min and max nodes to consider when not using application supplied limits
- private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2
+ private static final int minimumStatelessNodes = 2; // Since this number includes redundancy it cannot be lower than 2
+ private static final int minimumStatefulNodes = 3; // Leader election requires 3 nodes to have redundancy
private static final int maximumNodes = 150;
// When a query is issued on a node the cost is the sum of a fixed cost component and a cost component
@@ -40,9 +41,13 @@ public class AllocationOptimizer {
public Optional<AllocatableClusterResources> findBestAllocation(ResourceTarget target,
AllocatableClusterResources current,
Limits limits) {
+ int minimumNodes = current.clusterSpec().isStateful() ? minimumStatefulNodes : minimumStatelessNodes;
if (limits.isEmpty())
limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()),
new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()));
+ else
+ limits = atLeast(minimumNodes, limits);
+
Optional<AllocatableClusterResources> bestAllocation = Optional.empty();
NodeList hosts = nodeRepository.list().hosts();
for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) {
@@ -57,7 +62,9 @@ public class AllocationOptimizer {
ClusterResources next = new ClusterResources(nodes,
groups,
- nodeResourcesWith(nodesAdjustedForRedundancy, groupsAdjustedForRedundancy, limits, current, target));
+ nodeResourcesWith(nodesAdjustedForRedundancy,
+ groupsAdjustedForRedundancy,
+ limits, current, target));
var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, hosts, nodeRepository);
if (allocatableResources.isEmpty()) continue;
@@ -73,7 +80,11 @@ public class AllocationOptimizer {
* For the observed load this instance is initialized with, returns the resources needed per node to be at
* ideal load given a target node count
*/
- private NodeResources nodeResourcesWith(int nodes, int groups, Limits limits, AllocatableClusterResources current, ResourceTarget target) {
+ private NodeResources nodeResourcesWith(int nodes,
+ int groups,
+ Limits limits,
+ AllocatableClusterResources current,
+ ResourceTarget target) {
// Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size)
// Memory and disk: Scales with group size
double cpu, memory, disk;
@@ -103,4 +114,9 @@ public class AllocationOptimizer {
return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk);
}
+ /** Returns a copy of the given limits where the minimum nodes are at least the given value */
+ private Limits atLeast(int nodes, Limits limits) {
+ return limits.withMin(limits.min().withNodes(Math.max(nodes, limits.min().nodes())));
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java
index 15bf4427346..80ad81f6cdf 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Limits.java
@@ -38,6 +38,14 @@ public class Limits {
return max;
}
+ public Limits withMin(ClusterResources min) {
+ return new Limits(min, max);
+ }
+
+ public Limits withMax(ClusterResources max) {
+ return new Limits(min, max);
+ }
+
/** Caps the given resources at the limits of this. If it is empty the node resources are returned as-is */
public NodeResources cap(NodeResources resources) {
if (isEmpty()) return resources;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
index f368f4d139c..0246ec524e3 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
@@ -279,7 +279,7 @@ public class DockerProvisioningTest {
tester.makeReadyHosts(2, hostFlavor.resources()).activateTenantHosts();
ApplicationId app1 = ProvisioningTester.applicationId("app1");
- ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+ ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.container, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
var resources = new NodeResources(1, 8, 10, 1);
tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, resources),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
index 919d02c435c..37aeff82ed6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerProvisionTest.java
@@ -249,14 +249,14 @@ public class DynamicDockerProvisionTest {
tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 2, 20, 40),
resources(4, 1, 2, 20, 40)));
tester.assertNodes("Allocation specifies memory in the advertised amount",
- 2, 1, 2, 20, 40,
+ 3, 1, 2, 20, 40,
app1, cluster1);
// Redeploy the same
tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 2, 20, 40),
resources(4, 1, 2, 20, 40)));
tester.assertNodes("Allocation specifies memory in the advertised amount",
- 2, 1, 2, 20, 40,
+ 3, 1, 2, 20, 40,
app1, cluster1);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index be7d2656d13..611c3839f56 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -440,7 +440,7 @@ public class ProvisioningTest {
}
@Test
- public void test_node_limits_only() {
+ public void test_node_limits_only_container() {
Flavor hostFlavor = new Flavor(new NodeResources(20, 40, 100, 4));
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
.flavors(List.of(hostFlavor))
@@ -448,7 +448,7 @@ public class ProvisioningTest {
tester.makeReadyHosts(4, hostFlavor.resources()).activateTenantHosts();
ApplicationId app1 = ProvisioningTester.applicationId("app1");
- ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+ ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.container, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, NodeResources.unspecified()),
new ClusterResources(4, 1, NodeResources.unspecified())));
@@ -458,6 +458,24 @@ public class ProvisioningTest {
}
@Test
+ public void test_node_limits_only_content() {
+ Flavor hostFlavor = new Flavor(new NodeResources(20, 40, 100, 4));
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
+ .flavors(List.of(hostFlavor))
+ .build();
+ tester.makeReadyHosts(4, hostFlavor.resources()).activateTenantHosts();
+
+ ApplicationId app1 = ProvisioningTester.applicationId("app1");
+ ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, NodeResources.unspecified()),
+ new ClusterResources(4, 1, NodeResources.unspecified())));
+ tester.assertNodes("Initial allocation at (allowable) min with default resources",
+ 3, 1, 1.5, 8, 50, 0.3,
+ app1, cluster1);
+ }
+
+ @Test
public void test_changing_limits() {
Flavor hostFlavor = new Flavor(new NodeResources(20, 40, 100, 4));
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))