diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-04-21 20:47:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-21 20:47:51 +0200 |
commit | daa7010d1d953e7c18f74e9e46755d9a250a353a (patch) | |
tree | 263f79baf896a617f320409c6dc8bacb0a71c967 | |
parent | bc3caa587f166d02e90b8bb81a1656a6cdd32715 (diff) | |
parent | f85a43cfbffd05096eb968a37ef480780be49512 (diff) |
Merge pull request #17514 from vespa-engine/bratseth/redundancy
Bratseth/redundancy
7 files changed, 151 insertions, 40 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java index 53df9045dfe..0f824fcacf9 100644 --- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java +++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java @@ -207,7 +207,7 @@ public class InMemoryProvisioner implements HostProvisioner { } int nextIndex = nextIndexInCluster.getOrDefault(new Pair<>(clusterGroup.type(), clusterGroup.id()), startIndex); - while (allocation.size() < nodesInGroup) { + while (nonRetiredIn(allocation).size() < nodesInGroup) { // Find the smallest host that can fit the requested resources Optional<NodeResources> hostResources = freeNodes.keySet().stream() .sorted(new MemoryDiskCpu()) @@ -232,12 +232,16 @@ public class InMemoryProvisioner implements HostProvisioner { } nextIndexInCluster.put(new Pair<>(clusterGroup.type(), clusterGroup.id()), nextIndex); - while (allocation.size() > nodesInGroup) + while (nonRetiredIn(allocation).size() > nodesInGroup) allocation.remove(0); return allocation; } + private List<HostSpec> nonRetiredIn(List<HostSpec> hosts) { + return hosts.stream().filter(host -> ! retiredHostNames.contains(host.hostname())).collect(Collectors.toList()); + } + private int totalAllocatedTo(ClusterSpec cluster) { int count = 0; for (Map.Entry<ClusterSpec, List<HostSpec>> allocation : allocations.entrySet()) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java index 706d9a01c82..ba968411393 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Redundancy.java @@ -29,12 +29,16 @@ public class Redundancy implements StorDistributionConfig.Producer, ProtonConfig this.totalNodes = totalNodes; } + /** Returns the final redundancy per group */ public int finalRedundancy() { return effectiveFinalRedundancy()/groups; } + public int readyCopies() { return effectiveReadyCopies()/groups; } public int groups() { return groups; } public int totalNodes() { return totalNodes; } public int effectiveInitialRedundancy() { return Math.min(totalNodes, initialRedundancy * groups); } + + /** Returns the final redundancy over all groups */ public int effectiveFinalRedundancy() { return Math.min(totalNodes, finalRedundancy * groups); } public int effectiveReadyCopies() { return Math.min(totalNodes, readyCopies * groups); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java index 3c0f8996c22..f7ee09f6dec 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java @@ -143,10 +143,10 @@ public class StorageGroup { } public int getNumberOfLeafGroups() { - int count = subgroups.isEmpty() ? 1 : 0; - for (StorageGroup g : subgroups) { + if (subgroups.isEmpty()) return 1; + int count = 0; + for (StorageGroup g : subgroups) count += g.getNumberOfLeafGroups(); - } return count; } @@ -203,6 +203,10 @@ public class StorageGroup { } public StorageGroup buildRootGroup(DeployState deployState, RedundancyBuilder redundancyBuilder, ContentCluster owner) { + Optional<Integer> maxRedundancy = Optional.empty(); + if (owner.isHosted()) + maxRedundancy = validateRedundancyAndGroups(); + Optional<ModelElement> group = Optional.ofNullable(clusterElement.child("group")); Optional<ModelElement> nodes = getNodes(clusterElement); @@ -215,7 +219,9 @@ public class StorageGroup { StorageGroup storageGroup = (owner.isHosted()) ? groupBuilder.buildHosted(deployState, owner, Optional.empty()) : groupBuilder.buildNonHosted(deployState, owner, Optional.empty()); - Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(), storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes()); + Redundancy redundancy = redundancyBuilder.build(owner.getName(), owner.isHosted(), storageGroup.subgroups.size(), + storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes(), + maxRedundancy); owner.setRedundancy(redundancy); if (storageGroup.partitions.isEmpty() && (redundancy.groups() > 1)) { storageGroup.partitions = Optional.of(computePartitions(redundancy.finalRedundancy(), redundancy.groups())); @@ -223,6 +229,29 @@ public class StorageGroup { return storageGroup; } + private Optional<Integer> validateRedundancyAndGroups() { + var redundancyElement = clusterElement.child("redundancy"); + if (redundancyElement == null) return Optional.empty(); + long redundancy = redundancyElement.asLong(); + + var nodesElement = clusterElement.child("nodes"); + if (nodesElement == null) return Optional.empty(); + var nodesSpec = NodesSpecification.from(nodesElement, context); + + int minNodesPerGroup = (int)Math.ceil((double)nodesSpec.minResources().nodes() / nodesSpec.minResources().groups()); + + if (minNodesPerGroup < redundancy) { // TODO: Fail on this on Vespa 8, and simplify + context.getDeployLogger().log(Level.WARNING, + "Cluster '" + clusterElement.stringAttribute("id") + "' " + + "specifies redundancy " + redundancy + " but cannot be higher than " + + "the minimum nodes per group, which is " + minNodesPerGroup); + return Optional.of(minNodesPerGroup); + } + else { + return Optional.empty(); + } + } + /** This returns a partition string which specifies equal distribution between all groups */ // TODO: Make a partitions object static private String computePartitions(int redundancyPerGroup, int numGroups) { @@ -393,7 +422,8 @@ public class StorageGroup { childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG), childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG_STACKTRACE)); - List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get()) : Collections.emptyList(); + List<GroupBuilder> subGroups = groupElement.isPresent() ? collectSubGroups(isHosted, group, groupElement.get()) + : List.of(); List<XmlNodeBuilder> explicitNodes = new ArrayList<>(); explicitNodes.addAll(collectExplicitNodes(groupElement)); @@ -407,7 +437,7 @@ public class StorageGroup { nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context)); else if (nodesElement.isPresent() && context.getDeployState().isHosted() && context.getDeployState().zone().environment().isManuallyDeployed() ) // default to 1 node nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context)); - else if (! nodesElement.isPresent() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node + else if (nodesElement.isEmpty() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node nodeRequirement = Optional.of(NodesSpecification.nonDedicated(1, context)); else // Nodes or groups explicitly listed - resolve in GroupBuilder nodeRequirement = Optional.empty(); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java index 2e97cdabd73..669d4ff6a1d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java @@ -5,6 +5,8 @@ import com.yahoo.vespa.model.builder.xml.dom.ModelElement; import com.yahoo.vespa.model.content.IndexedHierarchicDistributionValidator; import com.yahoo.vespa.model.content.Redundancy; +import java.util.Optional; + /** * Builds redundancy config for a content cluster. */ @@ -37,7 +39,13 @@ public class RedundancyBuilder { } } } - public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes) { + public Redundancy build(String clusterName, boolean isHosted, int subGroups, int leafGroups, int totalNodes, + Optional<Integer> maxRedundancy) { + if (maxRedundancy.isPresent()) { + initialRedundancy = Math.min(initialRedundancy, maxRedundancy.get()); + finalRedundancy = Math.min(finalRedundancy, maxRedundancy.get()); + readyCopies = Math.min(readyCopies, maxRedundancy.get()); + } if (isHosted) { return new Redundancy(initialRedundancy, finalRedundancy, readyCopies, leafGroups, totalNodes); } else { diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java index 5a06379a1c2..c6ddc6b0c36 100644 --- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java +++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java @@ -777,7 +777,7 @@ public class ModelProvisioningTest { " </container>" + "</services>"; - int numberOfHosts = 10; + int numberOfHosts = 11; VespaModelTester tester = new VespaModelTester(); tester.addHosts(numberOfHosts); VespaModel model = tester.createModel(services, true, "node-1-3-10-09"); @@ -785,9 +785,9 @@ public class ModelProvisioningTest { // Check slobroks clusters assertEquals("Includes retired node", 1+3, model.getAdmin().getSlobroks().size()); - assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName()); - assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(1).getHostName()); - assertEquals("node-1-3-10-07", model.getAdmin().getSlobroks().get(2).getHostName()); + assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(0).getHostName()); + assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(1).getHostName()); + assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName()); assertEquals("Included in addition because it is retired", "node-1-3-10-09", model.getAdmin().getSlobroks().get(3).getHostName()); } @@ -802,19 +802,19 @@ public class ModelProvisioningTest { " </container>" + "</services>"; - int numberOfHosts = 10; + int numberOfHosts = 12; VespaModelTester tester = new VespaModelTester(); tester.addHosts(numberOfHosts); - VespaModel model = tester.createModel(services, true, "node-1-3-10-01", "node-1-3-10-02"); - assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size()); + VespaModel model = tester.createModel(services, true, "node-1-3-10-03", "node-1-3-10-04"); + assertEquals(10+2, model.getRoot().hostSystem().getHosts().size()); // Check slobroks clusters assertEquals("Includes retired node", 3+2, model.getAdmin().getSlobroks().size()); - assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(0).getHostName()); - assertEquals("node-1-3-10-09", model.getAdmin().getSlobroks().get(1).getHostName()); - assertEquals("node-1-3-10-08", model.getAdmin().getSlobroks().get(2).getHostName()); - assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(3).getHostName()); - assertEquals("Included in addition because it is retired", "node-1-3-10-01", model.getAdmin().getSlobroks().get(4).getHostName()); + assertEquals("node-1-3-10-12", model.getAdmin().getSlobroks().get(0).getHostName()); + assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName()); + assertEquals("node-1-3-10-10", model.getAdmin().getSlobroks().get(2).getHostName()); + assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(3).getHostName()); + assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName()); } @Test @@ -831,22 +831,22 @@ public class ModelProvisioningTest { " </container>" + "</services>"; - int numberOfHosts = 13; + int numberOfHosts = 16; VespaModelTester tester = new VespaModelTester(); tester.addHosts(numberOfHosts); - VespaModel model = tester.createModel(services, true, "node-1-3-10-12", "node-1-3-10-03", "node-1-3-10-02"); + VespaModel model = tester.createModel(services, true, "node-1-3-10-15", "node-1-3-10-05", "node-1-3-10-04"); assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts)); // Check slobroks clusters // ... from cluster default - assertEquals("Includes retired node", 3+3, model.getAdmin().getSlobroks().size()); - assertEquals("node-1-3-10-13", model.getAdmin().getSlobroks().get(0).getHostName()); - assertEquals("node-1-3-10-11", model.getAdmin().getSlobroks().get(1).getHostName()); - assertEquals("Included in addition because it is retired", "node-1-3-10-12", model.getAdmin().getSlobroks().get(2).getHostName()); + assertEquals("Includes retired node", 7, model.getAdmin().getSlobroks().size()); + assertEquals("node-1-3-10-16", model.getAdmin().getSlobroks().get(0).getHostName()); + assertEquals("node-1-3-10-14", model.getAdmin().getSlobroks().get(1).getHostName()); + assertEquals("Included in addition because it is retired", "node-1-3-10-15", model.getAdmin().getSlobroks().get(2).getHostName()); // ... from cluster bar - assertEquals("node-1-3-10-01", model.getAdmin().getSlobroks().get(3).getHostName()); - assertEquals("Included in addition because it is retired", "node-1-3-10-03", model.getAdmin().getSlobroks().get(4).getHostName()); - assertEquals("Included in addition because it is retired", "node-1-3-10-02", model.getAdmin().getSlobroks().get(5).getHostName()); + assertEquals("node-1-3-10-03", model.getAdmin().getSlobroks().get(3).getHostName()); + assertEquals("Included in addition because it is retired", "node-1-3-10-05", model.getAdmin().getSlobroks().get(5).getHostName()); + assertEquals("Included in addition because it is retired", "node-1-3-10-04", model.getAdmin().getSlobroks().get(6).getHostName()); } @Test @@ -967,7 +967,7 @@ public class ModelProvisioningTest { VespaModelTester tester = new VespaModelTester(); tester.addHosts(numberOfHosts); VespaModel model = tester.createModel(services, false); - assertThat(model.getRoot().hostSystem().getHosts().size(), is(numberOfHosts)); + assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size()); ContentCluster cluster = model.getContentClusters().get("bar"); assertEquals(2*3, cluster.redundancy().effectiveInitialRedundancy()); // Reduced from 3*3 @@ -997,6 +997,72 @@ public class ModelProvisioningTest { } @Test + public void testRedundancyWithGroupsTooHighRedundancyAndOneRetiredNode() { + String services = + "<?xml version='1.0' encoding='utf-8' ?>" + + "<services>" + + " <content version='1.0' id='bar'>" + + " <redundancy>2</redundancy>" + // Should have been illegal since we only have 1 node per group + " <documents>" + + " <document type='type1' mode='index'/>" + + " </documents>" + + " <nodes count='2' groups='2'/>" + + " </content>" + + "</services>"; + + int numberOfHosts = 3; + VespaModelTester tester = new VespaModelTester(); + tester.addHosts(numberOfHosts); + VespaModel model = tester.createModel(services, false, "node-1-3-10-03"); + assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size()); + + ContentCluster cluster = model.getContentClusters().get("bar"); + assertEquals(2, cluster.redundancy().effectiveInitialRedundancy()); + assertEquals(2, cluster.redundancy().effectiveFinalRedundancy()); + assertEquals(2, cluster.redundancy().effectiveReadyCopies()); + assertEquals("1|*", cluster.getRootGroup().getPartitions().get()); + assertEquals(0, cluster.getRootGroup().getNodes().size()); + assertEquals(2, cluster.getRootGroup().getSubgroups().size()); + System.out.println("Nodes in group 0: "); + cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n)); + System.out.println("Nodes in group 1: "); + cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n)); + } + + @Test + public void testRedundancyWithGroupsAndThreeRetiredNodes() { + String services = + "<?xml version='1.0' encoding='utf-8' ?>" + + "<services>" + + " <content version='1.0' id='bar'>" + + " <redundancy>1</redundancy>" + + " <documents>" + + " <document type='type1' mode='index'/>" + + " </documents>" + + " <nodes count='2' groups='2'/>" + + " </content>" + + "</services>"; + + int numberOfHosts = 5; + VespaModelTester tester = new VespaModelTester(); + tester.addHosts(numberOfHosts); + VespaModel model = tester.createModel(services, false, "node-1-3-10-05", "node-1-3-10-04", "node-1-3-10-03"); + assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size()); + + ContentCluster cluster = model.getContentClusters().get("bar"); + assertEquals(2, cluster.redundancy().effectiveInitialRedundancy()); + assertEquals(2, cluster.redundancy().effectiveFinalRedundancy()); + assertEquals(2, cluster.redundancy().effectiveReadyCopies()); + assertEquals("1|*", cluster.getRootGroup().getPartitions().get()); + assertEquals(0, cluster.getRootGroup().getNodes().size()); + assertEquals(2, cluster.getRootGroup().getSubgroups().size()); + System.out.println("Nodes in group 0: "); + cluster.getRootGroup().getSubgroups().get(0).getNodes().forEach(n -> System.out.println(" " + n)); + System.out.println("Nodes in group 1: "); + cluster.getRootGroup().getSubgroups().get(1).getNodes().forEach(n -> System.out.println(" " + n)); + } + + @Test public void testUsingNodesCountAttributesAndGettingTooFewNodes() { String services = "<?xml version='1.0' encoding='utf-8' ?>" + @@ -1841,12 +1907,12 @@ public class ModelProvisioningTest { "<services>" + " <container version='1.0' id='zk'>" + " <zookeeper/>" + - " <nodes count='4'/>" + // (3 + 1 retired) + " <nodes count='3'/>" + " </container>" + "</services>"; VespaModelTester tester = new VespaModelTester(); tester.addHosts(4); - VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-01"); + VespaModel model = tester.createModel(servicesXml, true, "node-1-3-10-04"); ApplicationContainerCluster cluster = model.getContainerClusters().get("zk"); assertEquals(1, cluster.getContainers().stream().filter(Container::isRetired).count()); assertEquals(3, cluster.getContainers().stream().filter(c -> !c.isRetired()).count()); diff --git a/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java b/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java index 5f6201c6f2d..fc8904bee7f 100644 --- a/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java +++ b/container-core/src/main/java/com/yahoo/processing/test/ProcessorLibrary.java @@ -169,7 +169,6 @@ public class ProcessorLibrary { List<FutureResponse> futureResponses = new ArrayList<>(chains.size()); for (Chain<? extends Processor> chain : chains) { - futureResponses.add(new AsyncExecution(chain, execution).process(request.clone())); } AsyncExecution.waitForAll(futureResponses, 1000); diff --git a/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java b/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java index 182ec7568f3..28ad202c4f5 100644 --- a/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/searchchain/test/AsyncExecutionOfOneChainTestCase.java @@ -48,20 +48,20 @@ public class AsyncExecutionOfOneChainTestCase { private class ParallelExecutor extends Searcher { /** The number of parallel executions */ - private static final int parallelism=2; + private static final int parallelism = 2; @Override public Result search(Query query, Execution execution) { - List<FutureResult> futureResults=new ArrayList<>(parallelism); - for (int i=0; i<parallelism; i++) + List<FutureResult> futureResults = new ArrayList<>(parallelism); + for (int i = 0; i < parallelism; i++) futureResults.add(new AsyncExecution(execution).search(query.clone())); - Result mainResult=execution.search(query); + Result mainResult = execution.search(query); // Add hits from other threads AsyncExecution.waitForAll(futureResults,query.getTimeLeft()); for (FutureResult futureResult : futureResults) { - Result result=futureResult.get(); + Result result = futureResult.get(); mainResult.mergeWith(result); mainResult.hits().addAll(result.hits().asList()); } @@ -72,7 +72,7 @@ public class AsyncExecutionOfOneChainTestCase { private static class RegularProvider extends Searcher { - private AtomicInteger counter=new AtomicInteger(); + private final AtomicInteger counter = new AtomicInteger(); @Override public Result search(Query query,Execution execution) { |