summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-04-21 20:47:51 +0200
committerGitHub <noreply@github.com>2021-04-21 20:47:51 +0200
commitdaa7010d1d953e7c18f74e9e46755d9a250a353a (patch)
tree263f79baf896a617f320409c6dc8bacb0a71c967
parentbc3caa587f166d02e90b8bb81a1656a6cdd32715 (diff)
parentf85a43cfbffd05096eb968a37ef480780be49512 (diff)
Merge pull request #17514 from vespa-engine/bratseth/redundancy
Bratseth/redundancy
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java8
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java42
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java10
-rw-r--r--config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java114
-rw-r--r--container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java1
-rw-r--r--container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java12
7 files changed, 151 insertions, 40 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
index 53df9045dfe..0f824fcacf9 100644
--- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
+++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
@@ -207,7 +207,7 @@ public class InMemoryProvisioner implements HostProvisioner {
}
int nextIndex = nextIndexInCluster.getOrDefault(new Pair<>(clusterGroup.type(), clusterGroup.id()), startIndex);
- while (allocation.size() < nodesInGroup) {
+ while (nonRetiredIn(allocation).size() < nodesInGroup) {
// Find the smallest host that can fit the requested resources
Optional<NodeResources> hostResources = freeNodes.keySet().stream()
.sorted(new MemoryDiskCpu())
@@ -232,12 +232,16 @@ public class InMemoryProvisioner implements HostProvisioner {
}
nextIndexInCluster.put(new Pair<>(clusterGroup.type(), clusterGroup.id()), nextIndex);
- while (allocation.size() > nodesInGroup)
+ while (nonRetiredIn(allocation).size() > nodesInGroup)
allocation.remove(0);
return allocation;
}
+ private List<HostSpec> nonRetiredIn(List<HostSpec> hosts) {
+ return hosts.stream().filter(host -> ! retiredHostNames.contains(host.hostname())).collect(Collectors.toList());
+ }
+
private int totalAllocatedTo(ClusterSpec cluster) {
int count = 0;
for (Map.Entry<ClusterSpec, List<HostSpec>> allocation : allocations.entrySet()) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
index 706d9a01c82..ba968411393 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java
@@ -29,12 +29,16 @@ public class Redundancy implements StorDistributionConfig.Producer, ProtonConfig
this.totalNodes = totalNodes;
}
+ /** Returns the final redundancy per group */
public int finalRedundancy() { return effectiveFinalRedundancy()/groups; }
+
public int readyCopies() { return effectiveReadyCopies()/groups; }
public int groups() { return groups; }
public int totalNodes() { return totalNodes; }
public int effectiveInitialRedundancy() { return Math.min(totalNodes, initialRedundancy * groups); }
+
+ /** Returns the final redundancy over all groups */
public int effectiveFinalRedundancy() { return Math.min(totalNodes, finalRedundancy * groups); }
public int effectiveReadyCopies() { return Math.min(totalNodes, readyCopies * groups); }
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
index 3c0f8996c22..f7ee09f6dec 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
@@ -143,10 +143,10 @@ public class StorageGroup {
}
public int getNumberOfLeafGroups() {
- int count = subgroups.isEmpty() ? 1 : 0;
- for (StorageGroup g : subgroups) {
+ if (subgroups.isEmpty()) return 1;
+ int count = 0;
+ for (StorageGroup g : subgroups)
count += g.getNumberOfLeafGroups();
- }
return count;
}
@@ -203,6 +203,10 @@ public class StorageGroup {
}
public StorageGroup buildRootGroup(DeployState deployState, RedundancyBuilder redundancyBuilder, ContentCluster owner) {
+ Optional<Integer> maxRedundancy = Optional.empty();
+ if (owner.isHosted())
+ maxRedundancy = validateRedundancyAndGroups();
+
Optional<ModelElement> group = Optional.ofNullable(clusterElement.child("group"));
Optional<ModelElement> nodes = getNodes(clusterElement);
@@ -215,7 +219,9 @@ public class StorageGroup {
StorageGroup storageGroup = (owner.isHosted())
? groupBuilder.buildHosted(deployState, owner, Optional.empty())
: groupBuilder.buildNonHosted(deployState, owner, Optional.empty());
- Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(), storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes());
+ Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(),
+ storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes(),
+ maxRedundancy);
owner.setRedundancy(redundancy);
if (storageGroup.partitions.isEmpty() && (redundancy.groups() > 1)) {
storageGroup.partitions = Optional.of(computePartitions(redundancy.finalRedundancy(), redundancy.groups()));
@@ -223,6 +229,29 @@ public class StorageGroup {
return storageGroup;
}
+ private Optional<Integer> validateRedundancyAndGroups() {
+ var redundancyElement = clusterElement.child("redundancy");
+ if (redundancyElement == null) return Optional.empty();
+ long redundancy = redundancyElement.asLong();
+
+ var nodesElement = clusterElement.child("nodes");
+ if (nodesElement == null) return Optional.empty();
+ var nodesSpec = NodesSpecification.from(nodesElement, context);
+
+ int minNodesPerGroup = (int)Math.ceil((double)nodesSpec.minResources().nodes() / nodesSpec.minResources().groups());
+
+ if (minNodesPerGroup < redundancy) { // TODO: Fail on this on Vespa 8, and simplify
+ context.getDeployLogger().log(Level.WARNING,
+ "Cluster '" + clusterElement.stringAttribute("id") + "' " +
+ "specifies redundancy " + redundancy + " but cannot be higher than " +
+ "the minimum nodes per group, which is " + minNodesPerGroup);
+ return Optional.of(minNodesPerGroup);
+ }
+ else {
+ return Optional.empty();
+ }
+ }
+
/** This returns a partition string which specifies equal distribution between all groups */
// TODO: Make a partitions object
static private String computePartitions(int redundancyPerGroup, int numGroups) {
@@ -393,7 +422,8 @@ public class StorageGroup {
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG),
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG_STACKTRACE));
- List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get()) : Collections.emptyList();
+ List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get())
+ : List.of();
List<XmlNodeBuilder> explicitNodes = new ArrayList<>();
explicitNodes.addAll(collectExplicitNodes(groupElement));
@@ -407,7 +437,7 @@ public class StorageGroup {
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
else if (nodesElement.isPresent() && context.getDeployState().isHosted() && context.getDeployState().zone().environment().isManuallyDeployed() ) // default to 1 node
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
- else if (! nodesElement.isPresent() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node
+ else if (nodesElement.isEmpty() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node
nodeRequirement = Optional.of(NodesSpecification.nonDedicated(1, context));
else // Nodes or groups explicitly listed - resolve in GroupBuilder
nodeRequirement = Optional.empty();
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
index 2e97cdabd73..669d4ff6a1d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
@@ -5,6 +5,8 @@ import com.yahoo.vespa.model.builder.xml.dom.ModelElement;
import com.yahoo.vespa.model.content.IndexedHierarchicDistributionValidator;
import com.yahoo.vespa.model.content.Redundancy;
+import java.util.Optional;
+
/**
* Builds redundancy config for a content cluster.
*/
@@ -37,7 +39,13 @@ public class RedundancyBuilder {
}
}
}
- public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes) {
+ public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes,
+ Optional<Integer> maxRedundancy) {
+ if (maxRedundancy.isPresent()) {
+ initialRedundancy = Math.min(initialRedundancy, maxRedundancy.get());
+ finalRedundancy = Math.min(finalRedundancy, maxRedundancy.get());
+ readyCopies = Math.min(readyCopies, maxRedundancy.get());
+ }
if (isHosted) {
return new Redundancy(initialRedundancy, finalRedundancy, readyCopies, leafGroups, totalNodes);
} else {
diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
index 5a06379a1c2..c6ddc6b0c36 100644
--- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
+++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
@@ -777,7 +777,7 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 10;
+ int numberOfHosts = 11;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
VespaModel model = tester.createModel(services, true, "node-1-3-10-09");
@@ -785,9 +785,9 @@ public class ModelProvisioningTest {
// Check slobroks clusters
assertEquals("Includes retired node", 1+3, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("node-1-3-10-07", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName());
assertEquals("Included in addition because it is retired", "node-1-3-10-09", model.getAdmin().getSlobroks().get(3).getHostName());
}
@@ -802,19 +802,19 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 10;
+ int numberOfHosts = 12;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
- VespaModel model = tester.createModel(services, true, "node-1-3-10-01", "node-1-3-10-02");
- assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+ VespaModel model = tester.createModel(services, true, "node-1-3-10-03", "node-1-3-10-04");
+ assertEquals(10+2, model.getRoot().hostSystem().getHosts().size());
// Check slobroks clusters
assertEquals("Includes retired node", 3+2, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-09", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(3).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-01", model.getAdmin().getSlobroks().get(4).getHostName());
+ assertEquals("node-1-3-10-12", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(3).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName());
}
@Test
@@ -831,22 +831,22 @@ public class ModelProvisioningTest {
" </container>" +
"</services>";
- int numberOfHosts = 13;
+ int numberOfHosts = 16;
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
- VespaModel model = tester.createModel(services, true, "node-1-3-10-12", "node-1-3-10-03", "node-1-3-10-02");
+ VespaModel model = tester.createModel(services, true, "node-1-3-10-15", "node-1-3-10-05", "node-1-3-10-04");
assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts));
// Check slobroks clusters
// ... from cluster default
- assertEquals("Includes retired node", 3+3, model.getAdmin().getSlobroks().size());
- assertEquals("node-1-3-10-13", model.getAdmin().getSlobroks().get(0).getHostName());
- assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-12", model.getAdmin().getSlobroks().get(2).getHostName());
+ assertEquals("Includes retired node", 7, model.getAdmin().getSlobroks().size());
+ assertEquals("node-1-3-10-16", model.getAdmin().getSlobroks().get(0).getHostName());
+ assertEquals("node-1-3-10-14", model.getAdmin().getSlobroks().get(1).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-15", model.getAdmin().getSlobroks().get(2).getHostName());
// ... from cluster bar
- assertEquals("node-1-3-10-01", model.getAdmin().getSlobroks().get(3).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName());
- assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(5).getHostName());
+ assertEquals("node-1-3-10-03", model.getAdmin().getSlobroks().get(3).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-05", model.getAdmin().getSlobroks().get(5).getHostName());
+ assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(6).getHostName());
}
@Test
@@ -967,7 +967,7 @@ public class ModelProvisioningTest {
VespaModelTester tester = new VespaModelTester();
tester.addHosts(numberOfHosts);
VespaModel model = tester.createModel(services, false);
- assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts));
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
assertEquals(2*3, cluster.redundancy().effectiveInitialRedundancy()); // Reduced from 3*3
@@ -997,6 +997,72 @@ public class ModelProvisioningTest {
}
@Test
+ public void testRedundancyWithGroupsTooHighRedundancyAndOneRetiredNode() {
+ String services =
+ "<?xml version='1.0' encoding='utf-8' ?>" +
+ "<services>" +
+ " <content version='1.0' id='bar'>" +
+ " <redundancy>2</redundancy>" + // Should have been illegal since we only have 1 node per group
+ " <documents>" +
+ " <document type='type1' mode='index'/>" +
+ " </documents>" +
+ " <nodes count='2' groups='2'/>" +
+ " </content>" +
+ "</services>";
+
+ int numberOfHosts = 3;
+ VespaModelTester tester = new VespaModelTester();
+ tester.addHosts(numberOfHosts);
+ VespaModel model = tester.createModel(services, false, "node-1-3-10-03");
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+
+ ContentCluster cluster = model.getContentClusters().get("bar");
+ assertEquals(2, cluster.redundancy().effectiveInitialRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveFinalRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveReadyCopies());
+ assertEquals("1|*", cluster.getRootGroup().getPartitions().get());
+ assertEquals(0, cluster.getRootGroup().getNodes().size());
+ assertEquals(2, cluster.getRootGroup().getSubgroups().size());
+ System.out.println("Nodes in group 0: ");
+ cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n));
+ System.out.println("Nodes in group 1: ");
+ cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n));
+ }
+
+ @Test
+ public void testRedundancyWithGroupsAndThreeRetiredNodes() {
+ String services =
+ "<?xml version='1.0' encoding='utf-8' ?>" +
+ "<services>" +
+ " <content version='1.0' id='bar'>" +
+ " <redundancy>1</redundancy>" +
+ " <documents>" +
+ " <document type='type1' mode='index'/>" +
+ " </documents>" +
+ " <nodes count='2' groups='2'/>" +
+ " </content>" +
+ "</services>";
+
+ int numberOfHosts = 5;
+ VespaModelTester tester = new VespaModelTester();
+ tester.addHosts(numberOfHosts);
+ VespaModel model = tester.createModel(services, false, "node-1-3-10-05", "node-1-3-10-04", "node-1-3-10-03");
+ assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
+
+ ContentCluster cluster = model.getContentClusters().get("bar");
+ assertEquals(2, cluster.redundancy().effectiveInitialRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveFinalRedundancy());
+ assertEquals(2, cluster.redundancy().effectiveReadyCopies());
+ assertEquals("1|*", cluster.getRootGroup().getPartitions().get());
+ assertEquals(0, cluster.getRootGroup().getNodes().size());
+ assertEquals(2, cluster.getRootGroup().getSubgroups().size());
+ System.out.println("Nodes in group 0: ");
+ cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n));
+ System.out.println("Nodes in group 1: ");
+ cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n));
+ }
+
+ @Test
public void testUsingNodesCountAttributesAndGettingTooFewNodes() {
String services =
"<?xml version='1.0' encoding='utf-8' ?>" +
@@ -1841,12 +1907,12 @@ public class ModelProvisioningTest {
"<services>" +
" <container version='1.0' id='zk'>" +
" <zookeeper/>" +
- " <nodes count='4'/>" + // (3 + 1 retired)
+ " <nodes count='3'/>" +
" </container>" +
"</services>";
VespaModelTester tester = new VespaModelTester();
tester.addHosts(4);
- VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-01");
+ VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-04");
ApplicationContainerCluster cluster = model.getContainerClusters().get("zk");
assertEquals(1, cluster.getContainers().stream().filter(Container::isRetired).count());
assertEquals(3, cluster.getContainers().stream().filter(c -> !c.isRetired()).count());
diff --git a/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java b/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java
index 5f6201c6f2d..fc8904bee7f 100644
--- a/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java
+++ b/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java
@@ -169,7 +169,6 @@ public class ProcessorLibrary {
List<FutureResponse> futureResponses = new ArrayList<>(chains.size());
for (Chain<? extends Processor> chain : chains) {
-
futureResponses.add(new AsyncExecution(chain, execution).process(request.clone()));
}
AsyncExecution.waitForAll(futureResponses, 1000);
diff --git a/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java b/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java
index 182ec7568f3..28ad202c4f5 100644
--- a/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java
@@ -48,20 +48,20 @@ public class AsyncExecutionOfOneChainTestCase {
private class ParallelExecutor extends Searcher {
/** The number of parallel executions */
- private static final int parallelism=2;
+ private static final int parallelism = 2;
@Override
public Result search(Query query, Execution execution) {
- List<FutureResult> futureResults=new ArrayList<>(parallelism);
- for (int i=0; i<parallelism; i++)
+ List<FutureResult> futureResults = new ArrayList<>(parallelism);
+ for (int i = 0; i < parallelism; i++)
futureResults.add(new AsyncExecution(execution).search(query.clone()));
- Result mainResult=execution.search(query);
+ Result mainResult = execution.search(query);
// Add hits from other threads
AsyncExecution.waitForAll(futureResults,query.getTimeLeft());
for (FutureResult futureResult : futureResults) {
- Result result=futureResult.get();
+ Result result = futureResult.get();
mainResult.mergeWith(result);
mainResult.hits().addAll(result.hits().asList());
}
@@ -72,7 +72,7 @@ public class AsyncExecutionOfOneChainTestCase {
private static class RegularProvider extends Searcher {
- private AtomicInteger counter=new AtomicInteger();
+ private final AtomicInteger counter = new AtomicInteger();
@Override
public Result search(Query query,Execution execution) {