summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-04-21 20:47:51 +0200
committerGitHub <noreply@github.com>2021-04-21 20:47:51 +0200
commitdaa7010d1d953e7c18f74e9e46755d9a250a353a (patch)
tree263f79baf896a617f320409c6dc8bacb0a71c967 /config-model
parentbc3caa587f166d02e90b8bb81a1656a6cdd32715 (diff)
parentf85a43cfbffd05096eb968a37ef480780be49512 (diff)
Merge pull request #17514 from vespa-engine/bratseth/redundancy
Bratseth/redundancy
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java8
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java42
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java10
-rw-r--r--config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java114
5 files changed, 145 insertions, 33 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
index 53df9045dfe..0f824fcacf9 100644
--- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
+++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
@@ -207,7 +207,7 @@ public class InMemoryProvisioner implements HostProvisioner {
}
int nextIndex = nextIndexInCluster.getOrDefault(new Pair<>(clusterGroup.type(), clusterGroup.id()), startIndex);
- while (allocation.size() < nodesInGroup) {
+ while (nonRetiredIn(allocation).size() < nodesInGroup) {
// Find the smallest host that can fit the requested resources
Optional<NodeResources> hostResources = freeNodes.keySet().stream()
.sorted(new MemoryDiskCpu())
@@ -232,12 +232,16 @@ public class InMemoryProvisioner implements HostProvisioner {
}
nextIndexInCluster.put(new Pair<>(clusterGroup.type(), clusterGroup.id()), nextIndex);
- while (allocation.size() > nodesInGroup)
+ while (nonRetiredIn(allocation).size() > nodesInGroup)
allocation.remove(0);
return allocation;
}
+ private List<HostSpec> nonRetiredIn(List<HostSpec> hosts) {
+ return hosts.stream().filter(host -> ! retiredHostNames.contains(host.hostname())).collect(Collectors.toList());
+ }
+
private int totalAllocatedTo(ClusterSpec cluster) {
int count = 0;
for (Map.Entry<ClusterSpec, List<HostSpec>> allocation : allocations.entrySet()) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
index 706d9a01c82..ba968411393 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
@@ -29,12 +29,16 @@ public class Redundancy implements StorDistributionConfig.Producer, ProtonConfig
this.totalNodes = totalNodes;
}
+ /** Returns the final redundancy per group */
public int finalRedundancy() { return effectiveFinalRedundancy()/groups; }
+
public int readyCopies() { return effectiveReadyCopies()/groups; }
public int groups() { return groups; }
public int totalNodes() { return totalNodes; }
public int effectiveInitialRedundancy() { return Math.min(totalNodes, initialRedundancy * groups); }
+
+ /** Returns the final redundancy over all groups */
public int effectiveFinalRedundancy() { return Math.min(totalNodes, finalRedundancy * groups); }
public int effectiveReadyCopies() { return Math.min(totalNodes, readyCopies * groups); }
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
index 3c0f8996c22..f7ee09f6dec 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
@@ -143,10 +143,10 @@ public class StorageGroup {
}
public int getNumberOfLeafGroups() {
- int count = subgroups.isEmpty() ? 1 : 0;
- for (StorageGroup g : subgroups) {
+ if (subgroups.isEmpty()) return 1;
+ int count = 0;
+ for (StorageGroup g : subgroups)
count += g.getNumberOfLeafGroups();
- }
return count;
}
@@ -203,6 +203,10 @@ public class StorageGroup {
}
public StorageGroup buildRootGroup(DeployState deployState, RedundancyBuilder redundancyBuilder, ContentCluster owner) {
+ Optional<Integer> maxRedundancy = Optional.empty();
+ if (owner.isHosted())
+ maxRedundancy = validateRedundancyAndGroups();
+
Optional<ModelElement> group = Optional.ofNullable(clusterElement.child("group"));
Optional<ModelElement> nodes = getNodes(clusterElement);
@@ -215,7 +219,9 @@ public class StorageGroup {
StorageGroup storageGroup = (owner.isHosted())
? groupBuilder.buildHosted(deployState, owner, Optional.empty())
: groupBuilder.buildNonHosted(deployState, owner, Optional.empty());
- Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(), storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes());
+ Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(),
+ storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes(),
+ maxRedundancy);
owner.setRedundancy(redundancy);
if (storageGroup.partitions.isEmpty() && (redundancy.groups() > 1)) {
storageGroup.partitions = Optional.of(computePartitions(redundancy.finalRedundancy(), redundancy.groups()));
@@ -223,6 +229,29 @@ public class StorageGroup {
return storageGroup;
}
+ private Optional<Integer> validateRedundancyAndGroups() {
+ var redundancyElement = clusterElement.child("redundancy");
+ if (redundancyElement == null) return Optional.empty();
+ long redundancy = redundancyElement.asLong();
+
+ var nodesElement = clusterElement.child("nodes");
+ if (nodesElement == null) return Optional.empty();
+ var nodesSpec = NodesSpecification.from(nodesElement, context);
+
+ int minNodesPerGroup = (int)Math.ceil((double)nodesSpec.minResources().nodes() / nodesSpec.minResources().groups());
+
+ if (minNodesPerGroup < redundancy) { // TODO: Fail on this on Vespa 8, and simplify
+ context.getDeployLogger().log(Level.WARNING,
+ "Cluster '" + clusterElement.stringAttribute("id") + "' " +
+ "specifies redundancy " + redundancy + " but cannot be higher than " +
+ "the minimum nodes per group, which is " + minNodesPerGroup);
+ return Optional.of(minNodesPerGroup);
+ }
+ else {
+ return Optional.empty();
+ }
+ }
+
/** This returns a partition string which specifies equal distribution between all groups */
// TODO: Make a partitions object
static private String computePartitions(int redundancyPerGroup, int numGroups) {
@@ -393,7 +422,8 @@ public class StorageGroup {
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG),
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG_STACKTRACE));
- List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get()) : Collections.emptyList();
+ List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get())
+ : List.of();
List<XmlNodeBuilder> explicitNodes = new ArrayList<>();
explicitNodes.addAll(collectExplicitNodes(groupElement));
@@ -407,7 +437,7 @@ public class StorageGroup {
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
else if (nodesElement.isPresent() && context.getDeployState().isHosted() && context.getDeployState().zone().environment().isManuallyDeployed() ) // default to 1 node
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
- else if (! nodesElement.isPresent() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node
+ else if (nodesElement.isEmpty() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node
nodeRequirement = Optional.of(NodesSpecification.nonDedicated(1, context));
else // Nodes or groups explicitly listed - resolve in GroupBuilder
nodeRequirement = Optional.empty();
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
index 2e97cdabd73..669d4ff6a1d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
@@ -5,6 +5,8 @@ import com.yahoo.vespa.model.builder.xml.dom.ModelElement;
import com.yahoo.vespa.model.content.IndexedHierarchicDistributionValidator;
import com.yahoo.vespa.model.content.Redundancy;
+import java.util.Optional;
+
/**
* Builds redundancy config for a content cluster.
*/
@@ -37,7 +39,13 @@ public class RedundancyBuilder {
}
}
}
- public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes) {
+ public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes,
+ Optional<Integer> maxRedundancy) {
+ if (maxRedundancy.isPresent()) {
+ initialRedundancy = Math.min(initialRedundancy, maxRedundancy.get());
+ finalRedundancy = Math.min(finalRedundancy, maxRedundancy.get());
+ readyCopies = Math.min(readyCopies, maxRedundancy.get());
+ }
if (isHosted) {
return new Redundancy(initialRedundancy, finalRedundancy, readyCopies, leafGroups, totalNodes);
} else {
diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
index 5a06379a1c2..c6ddc6b0c36 100644
--- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
+++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
@@ -777,7 +777,7 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 10;
+ int numberOfHosts = 11;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
VespaModel model = tester.createModel(services, true, "node-1-3-10-09");
@@ -785,9 +785,9 @@ public class ModelProvisioningTest {
// Check slobroks clusters
assertEquals("Includes retired node", 1+3, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("node-1-3-10-07", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName());
assertEquals("Included in addition because it is retired", "node-1-3-10-09", model.getAdmin().getSlobroks().get(3).getHostName());
}
@@ -802,19 +802,19 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 10;
+ int numberOfHosts = 12;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
- VespaModel model = tester.createModel(services, true, "node-1-3-10-01", "node-1-3-10-02");
- assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+ VespaModel model = tester.createModel(services, true, "node-1-3-10-03", "node-1-3-10-04");
+ assertEquals(10+2, model.getRoot().hostSystem().getHosts().size());
// Check slobroks clusters
assertEquals("Includes retired node", 3+2, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-09", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(3).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-01", model.getAdmin().getSlobroks().get(4).getHostName());
+ assertEquals("node-1-3-10-12", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(3).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName());
}
@Test
@@ -831,22 +831,22 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 13;
+ int numberOfHosts = 16;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
- VespaModel model = tester.createModel(services, true, "node-1-3-10-12", "node-1-3-10-03", "node-1-3-10-02");
+ VespaModel model = tester.createModel(services, true, "node-1-3-10-15", "node-1-3-10-05", "node-1-3-10-04");
assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts));
// Check slobroks clusters
// ... from cluster default
- assertEquals("Includes retired node", 3+3, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-13", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-12", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("Includes retired node", 7, model.getAdmin().getSlobroks().size());
+ assertEquals("node-1-3-10-16", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-14", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-15", model.getAdmin().getSlobroks().get(2).getHostName());
// ... from cluster bar
- assertEquals("node-1-3-10-01", model.getAdmin().getSlobroks().get(3).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(5).getHostName());
+ assertEquals("node-1-3-10-03", model.getAdmin().getSlobroks().get(3).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-05", model.getAdmin().getSlobroks().get(5).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(6).getHostName());
}
@Test
@@ -967,7 +967,7 @@ public class ModelProvisioningTest {
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
VespaModel model = tester.createModel(services, false);
- assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts));
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
assertEquals(2*3, cluster.redundancy().effectiveInitialRedundancy()); // Reduced from 3*3
@@ -997,6 +997,72 @@ public class ModelProvisioningTest {
}
@Test
+ public void testRedundancyWithGroupsTooHighRedundancyAndOneRetiredNode() {
+ String services =
+ "<?xml version='1.0' encoding='utf-8' ?>" +
+ "<services>" +
+ " <content version='1.0' id='bar'>" +
+ " <redundancy>2</redundancy>" + // Should have been illegal since we only have 1 node per group
+ " <documents>" +
+ " <document type='type1' mode='index'/>" +
+ " </documents>" +
+ " <nodes count='2' groups='2'/>" +
+ " </content>" +
+ "</services>";
+
+ int numberOfHosts = 3;
+ VespaModelTester tester = new VespaModelTester();
+ tester.addHosts(numberOfHosts);
+ VespaModel model = tester.createModel(services, false, "node-1-3-10-03");
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+
+ ContentCluster cluster = model.getContentClusters().get("bar");
+ assertEquals(2, cluster.redundancy().effectiveInitialRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveFinalRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveReadyCopies());
+ assertEquals("1|*", cluster.getRootGroup().getPartitions().get());
+ assertEquals(0, cluster.getRootGroup().getNodes().size());
+ assertEquals(2, cluster.getRootGroup().getSubgroups().size());
+ System.out.println("Nodes in group 0: ");
+ cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n));
+ System.out.println("Nodes in group 1: ");
+ cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n));
+ }
+
+ @Test
+ public void testRedundancyWithGroupsAndThreeRetiredNodes() {
+ String services =
+ "<?xml version='1.0' encoding='utf-8' ?>" +
+ "<services>" +
+ " <content version='1.0' id='bar'>" +
+ " <redundancy>1</redundancy>" +
+ " <documents>" +
+ " <document type='type1' mode='index'/>" +
+ " </documents>" +
+ " <nodes count='2' groups='2'/>" +
+ " </content>" +
+ "</services>";
+
+ int numberOfHosts = 5;
+ VespaModelTester tester = new VespaModelTester();
+ tester.addHosts(numberOfHosts);
+ VespaModel model = tester.createModel(services, false, "node-1-3-10-05", "node-1-3-10-04", "node-1-3-10-03");
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+
+ ContentCluster cluster = model.getContentClusters().get("bar");
+ assertEquals(2, cluster.redundancy().effectiveInitialRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveFinalRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveReadyCopies());
+ assertEquals("1|*", cluster.getRootGroup().getPartitions().get());
+ assertEquals(0, cluster.getRootGroup().getNodes().size());
+ assertEquals(2, cluster.getRootGroup().getSubgroups().size());
+ System.out.println("Nodes in group 0: ");
+ cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n));
+ System.out.println("Nodes in group 1: ");
+ cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n));
+ }
+
+ @Test
public void testUsingNodesCountAttributesAndGettingTooFewNodes() {
String services =
"<?xml version='1.0' encoding='utf-8' ?>" +
@@ -1841,12 +1907,12 @@ public class ModelProvisioningTest {
"<services>" +
" <container version='1.0' id='zk'>" +
" <zookeeper/>" +
- " <nodes count='4'/>" + // (3 + 1 retired)
+ " <nodes count='3'/>" +
" </container>" +
"</services>";
VespaModelTester tester = new VespaModelTester();
tester.addHosts(4);
- VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-01");
+ VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-04");
ApplicationContainerCluster cluster = model.getContainerClusters().get("zk");
assertEquals(1, cluster.getContainers().stream().filter(Container::isRetired).count());
assertEquals(3, cluster.getContainers().stream().filter(c -> !c.isRetired()).count());